Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions packages/php-datatypes/src/Definition/Common.php
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,12 @@ public function toMetadata(): array
[
'key' => self::KBC_METADATA_KEY_TYPE,
'value' => $this->getType(),
],[
],
[
'key' => self::KBC_METADATA_KEY_NULLABLE,
'value' => $this->isNullable(),
],[
],
[
'key' => self::KBC_METADATA_KEY_BASETYPE,
'value' => $this->getBasetype(),
],
Expand Down
8 changes: 8 additions & 0 deletions packages/php-datatypes/src/Definition/DefinitionInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,13 @@ public function toArray(): array;

public function getBasetype(): string;

public function getType(): string;

public function getLength(): ?string;

public function isNullable(): bool;

public function getDefault(): ?string;

Comment on lines +18 to +25
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nechápu proč to neni v tom interface

public static function getTypeByBasetype(string $basetype): string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,24 @@ public static function covertException(Throwable $e): Throwable
}

// phpcs:ignore
$isNullInNotNullCol = preg_match('/NULL result in a non-nullable column/', $e->getMessage(), $output_array) === 1;
$isNotRecognized = preg_match('/ \'(.*)\' is not recognized/', $e->getMessage(), $output_array) === 1;
if ($isNullInNotNullCol || $isNotRecognized) {
$message = $e->getMessage();
$isObjectCastFail = preg_match('/Failed to cast variant value .* to OBJECT/', $message, $output_array) === 1;
if ($isObjectCastFail) {
// remove variant from message as it would confuse users
// we are using TO_OBJECT(TO_VARIANT(...)) casting combination
$message = str_replace('variant ', '', $message);
}
$isInvalidGeo = preg_match('/Error parsing Geo input/', $message, $output_array) === 1;
// phpcs:ignore
$isInvalidBinary = preg_match('/The following string is not a legal hex-encoded value/', $message, $output_array) === 1;
$isNullInNotNullCol = preg_match('/NULL result in a non-nullable column/', $message, $output_array) === 1;
$isNotRecognized = preg_match('/ \'(.*)\' is not recognized/', $message, $output_array) === 1;
if ($isNotRecognized) {
$message .= '. Value you are trying to load cannot be converted to used datatype.';
}
if ($isNullInNotNullCol || $isNotRecognized || $isInvalidBinary || $isInvalidGeo || $isObjectCastFail) {
return new Exception(
'Load error: ' . $e->getMessage(),
'Load error: ' . $message,
Exception::VALUE_CONVERSION,
$e
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public function __construct(
int $numberOfIgnoredLines = 0,
bool $requireSameTables = self::SAME_TABLES_NOT_REQUIRED,
bool $nullManipulation = self::NULL_MANIPULATION_ENABLED,
array $ignoreColumns = []
array $ignoreColumns = [],
) {
parent::__construct(
$convertEmptyValuesToNull,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@
use Keboola\Datatype\Definition\Snowflake;
use Keboola\Db\ImportExport\Backend\Snowflake\Helper\QuoteHelper;
use Keboola\Db\ImportExport\Backend\Snowflake\SnowflakeImportOptions;
use Keboola\Db\ImportExport\Backend\SourceDestinationColumnMap;
use Keboola\Db\ImportExport\Backend\ToStageImporterInterface;
use Keboola\Db\ImportExport\ImportOptionsInterface;
use Keboola\Db\ImportExport\Storage\Snowflake\Table;
use Keboola\Db\ImportExport\Storage\SourceInterface;
use Keboola\TableBackendUtils\Column\Snowflake\SnowflakeColumn;
use Keboola\TableBackendUtils\Escaping\Exasol\ExasolQuote;
use Keboola\TableBackendUtils\Escaping\Snowflake\SnowflakeQuote;
use Keboola\TableBackendUtils\Table\Snowflake\SnowflakeTableDefinition;

class SqlBuilder
{
private const AUTO_CASTING_TYPES = [
Snowflake::TYPE_VARIANT,
Snowflake::TYPE_OBJECT,
Snowflake::TYPE_ARRAY,
];
public const SRC_ALIAS = 'src';

public function getBeginTransaction(): string
Expand Down Expand Up @@ -165,6 +167,11 @@ public function getInsertAllIntoTargetTableCommand(
SnowflakeImportOptions $importOptions,
string $timestamp
): string {
$columnMap = SourceDestinationColumnMap::createForTables(
$sourceTableDefinition,
$destinationTableDefinition,
$importOptions->ignoreColumns()
);
$destinationTable = sprintf(
'%s.%s',
SnowflakeQuote::quoteSingleIdentifier($destinationTableDefinition->getSchemaName()),
Expand All @@ -184,44 +191,67 @@ public function getInsertAllIntoTargetTableCommand(

$columnsSetSql = [];

/** @var SnowflakeColumn $columnDefinition */
foreach ($sourceTableDefinition->getColumnsDefinitions() as $columnDefinition) {
/** @var SnowflakeColumn $sourceColumn */
foreach ($sourceTableDefinition->getColumnsDefinitions() as $sourceColumn) {
// output mapping same tables are required do not convert nulls to empty strings
if (!$importOptions->isNullManipulationEnabled()) {
$columnsSetSql[] = SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName());
$destinationColumn = $columnMap->getDestination($sourceColumn);
$type = $destinationColumn->getColumnDefinition()->getType();
$useAutoCast = in_array($type, self::AUTO_CASTING_TYPES, true);
$isSameType = $type === $sourceColumn->getColumnDefinition()->getType();
if ($useAutoCast && !$isSameType) {
if ($type === Snowflake::TYPE_OBJECT) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tady ocekavame to ze pro dalsi edgecasy co prijdou, tak se to bude vetvit, ze? Nechces to nekam vycuknout? kdyz to nechame takto, tak s dalsim typem prijde copypaste stejneho ifu. Ale asi je to pre-mature... necham na tobe

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prakjticky sem teď přidal test pro GEOMETRY a GEOGRAPHY víc takových typů tam není takže to dost pravděpodobně potřeba vůbec nebude pro nic dalšího.

// object can't be casted from string but can be casted from variant
$columnsSetSql[] = sprintf(
'CAST(TO_VARIANT(%s) AS %s) AS %s',
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName())
);
continue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ta metoda je takto nadesignovana, ale ten ty continue misto trochu lepe organizovaneho if else mi prijdou strasne neprehledne

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if else by byl hrozný hell, možeš to zkusit jak by to vypadalo :) Možná match(true) by to trochu pošéfoval, ale takto jdeš po těch podmínkách a máš early return přes continue. Když je to "flat" přes else if tak bude strašně komplikované ty elseif poskládat správně pod sebe.

}
$columnsSetSql[] = sprintf(
'CAST(%s AS %s) AS %s',
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName())
);
continue;
}
$columnsSetSql[] = SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName());
continue;
}

// Input mapping convert empty values to null
// empty strings '' are converted to null values
if (in_array($columnDefinition->getColumnName(), $importOptions->getConvertEmptyValuesToNull(), true)) {
if (in_array($sourceColumn->getColumnName(), $importOptions->getConvertEmptyValuesToNull(), true)) {
// use nullif only for string base type
if ($columnDefinition->getColumnDefinition()->getBasetype() === BaseType::STRING) {
if ($sourceColumn->getColumnDefinition()->getBasetype() === BaseType::STRING) {
$columnsSetSql[] = sprintf(
'IFF(%s = \'\', NULL, %s)',
SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName())
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName())
);
continue;
}
// if tables is not typed column could be other than string in this case we skip conversion
$columnsSetSql[] = SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName());
$columnsSetSql[] = SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName());
continue;
}

// for string base type convert null values to empty string ''
//phpcs:ignore
if (!$importOptions->usingUserDefinedTypes() && $columnDefinition->getColumnDefinition()->getBasetype() === BaseType::STRING) {
if (!$importOptions->usingUserDefinedTypes() && $sourceColumn->getColumnDefinition()->getBasetype() === BaseType::STRING) {
$columnsSetSql[] = sprintf(
'COALESCE(%s, \'\') AS %s',
SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName())
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName())
);
continue;
}
// on columns other than string dont use COALESCE
// this will fail if the column is not null, but this is expected
$columnsSetSql[] = SnowflakeQuote::quoteSingleIdentifier($columnDefinition->getColumnName());
$columnsSetSql[] = SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName());
}

if ($useTimestamp) {
Expand Down Expand Up @@ -256,29 +286,58 @@ public function getUpdateWithPkCommand(
SnowflakeImportOptions $importOptions,
string $timestamp
): string {
$columnMap = SourceDestinationColumnMap::createForTables(
$stagingTableDefinition,
$destinationDefinition,
$importOptions->ignoreColumns()
);
$columnsSet = [];

foreach ($stagingTableDefinition->getColumnsNames() as $columnName) {
foreach ($stagingTableDefinition->getColumnsDefinitions() as $sourceColumn) {
if (!$importOptions->isNullManipulationEnabled()) {
$destinationColumn = $columnMap->getDestination($sourceColumn);
$type = $destinationColumn->getColumnDefinition()->getType();
$useAutoCast = in_array($type, self::AUTO_CASTING_TYPES, true);
$isSameType = $type === $sourceColumn->getColumnDefinition()->getType();
if ($useAutoCast && !$isSameType) {
if ($type === Snowflake::TYPE_OBJECT) {
// object can't be casted from string but can be casted from variant
$columnsSet[] = sprintf(
'%s = CAST(TO_VARIANT("src".%s) AS %s)',
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
);
continue;
}
$columnsSet[] = sprintf(
'%s = CAST("src".%s AS %s)',
SnowflakeQuote::quoteSingleIdentifier($destinationColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
$destinationColumn->getColumnDefinition()->getSQLDefinition(),
);
continue;
}

$columnsSet[] = sprintf(
'%s = "src".%s',
SnowflakeQuote::quoteSingleIdentifier($columnName),
SnowflakeQuote::quoteSingleIdentifier($columnName),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
);
continue;
}
if (in_array($columnName, $importOptions->getConvertEmptyValuesToNull(), true)) {
if (in_array($sourceColumn->getColumnName(), $importOptions->getConvertEmptyValuesToNull(), true)) {
$columnsSet[] = sprintf(
'%s = IFF("src".%s = \'\', NULL, "src".%s)',
SnowflakeQuote::quoteSingleIdentifier($columnName),
SnowflakeQuote::quoteSingleIdentifier($columnName),
SnowflakeQuote::quoteSingleIdentifier($columnName)
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName())
);
} else {
$columnsSet[] = sprintf(
'%s = COALESCE("src".%s, \'\')',
SnowflakeQuote::quoteSingleIdentifier($columnName),
SnowflakeQuote::quoteSingleIdentifier($columnName)
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName()),
SnowflakeQuote::quoteSingleIdentifier($sourceColumn->getColumnName())
);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
<?php

declare(strict_types=1);

namespace Keboola\Db\ImportExport\Backend;

use Error;
use Exception;
use Generator;
use Keboola\Db\ImportExport\Exception\ColumnsMismatchException;
use Keboola\TableBackendUtils\Column\ColumnCollection;
use Keboola\TableBackendUtils\Column\ColumnInterface;
use Keboola\TableBackendUtils\Table\TableDefinitionInterface;
use WeakMap;

/**
* Class will create map of table column based on columns order
*/
final class SourceDestinationColumnMap
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tady to mapování source->destination sloupců sem potřeboval dostat někam ven.

{
/**
* @var WeakMap<ColumnInterface,ColumnInterface>
*/
private WeakMap $map;

/**
* @param string[] $ignoreColumns
*/
public function __construct(
private readonly ColumnCollection $source,
private readonly ColumnCollection $destination,
private readonly array $ignoreColumns = [],
) {
$this->map = new WeakMap();
$this->buildMap();
}

/**
* @param string[] $ignoreColumns
*/
public static function createForTables(
TableDefinitionInterface $source,
TableDefinitionInterface $destination,
array $ignoreColumns = [],
): self {
return new self(
$source->getColumnsDefinitions(),
$destination->getColumnsDefinitions(),
$ignoreColumns
);
}

private function buildMap(): void
{
$it0 = $this->source->getIterator();
$it1 = $this->destination->getIterator();
while ($it0->valid() || $it1->valid()) {
$it0 = $this->ignoreColumn($it0, $it1);
if ($it0 === false) {
break;
}
$it1 = $this->ignoreColumn($it1, $it0);
if ($it1 === false) {
break;
}

if ($it0->valid() && $it1->valid()) {
/** @var ColumnInterface $sourceCol */
$sourceCol = $it0->current();
/** @var ColumnInterface $destCol */
$destCol = $it1->current();
$this->map[$sourceCol] = $destCol;
} else {
throw ColumnsMismatchException::createColumnsCountMismatch($this->source, $this->destination);
}
$it0->next();
$it1->next();
}
}

public function getDestination(ColumnInterface $source): ColumnInterface
{
try {
$destination = $this->map[$source];
} catch (Error $e) {
// this can happen only when class is used with different source and destination tables instances
throw new Exception(sprintf('Column "%s" not found in destination table', $source->getColumnName()));
}
assert($destination !== null);
return $destination;
}

/**
* @param Generator<int, ColumnInterface> $it0
* @param Generator<int, ColumnInterface> $it1
* @return Generator<int, ColumnInterface>|false
*/
private function ignoreColumn(Generator $it0, Generator $it1): Generator|false
{
if ($this->isIgnoredColumn($it0)) {
$it0->next();
$this->ignoreColumn($it0, $it1);
if (!$it0->valid() && !$it1->valid()) {
return false;
}
}

return $it0;
}

/**
* @param Generator<int, ColumnInterface> $it
*/
private function isIgnoredColumn(Generator $it): bool
{
return $it->valid() && in_array($it->current()->getColumnName(), $this->ignoreColumns, true);
}
}
Loading