1919
2020package org .apache .iceberg .mr .hive ;
2121
22+ import com .fasterxml .jackson .databind .JsonNode ;
2223import com .fasterxml .jackson .databind .ObjectMapper ;
2324import java .util .Arrays ;
2425import java .util .Collection ;
4142import org .apache .hadoop .hive .metastore .api .hive_metastoreConstants ;
4243import org .apache .hadoop .hive .ql .ddl .misc .sortoder .SortFieldDesc ;
4344import org .apache .hadoop .hive .ql .ddl .misc .sortoder .SortFields ;
45+ import org .apache .hadoop .hive .ql .ddl .misc .sortoder .ZOrderFieldDesc ;
46+ import org .apache .hadoop .hive .ql .ddl .misc .sortoder .ZOrderFields ;
4447import org .apache .hadoop .hive .ql .util .NullOrdering ;
4548import org .apache .iceberg .BaseMetastoreTableOperations ;
4649import org .apache .iceberg .BaseTable ;
7477import org .slf4j .LoggerFactory ;
7578
7679import static org .apache .iceberg .RowLevelOperationMode .MERGE_ON_READ ;
80+ import static org .apache .iceberg .mr .InputFormatConfig .SORT_COLUMNS ;
81+ import static org .apache .iceberg .mr .InputFormatConfig .SORT_ORDER ;
82+ import static org .apache .iceberg .mr .InputFormatConfig .ZORDER ;
7783
7884public class BaseHiveIcebergMetaHook implements HiveMetaHook {
7985 private static final Logger LOG = LoggerFactory .getLogger (BaseHiveIcebergMetaHook .class );
@@ -84,6 +90,7 @@ public class BaseHiveIcebergMetaHook implements HiveMetaHook {
8490 private static final Set <String > PARAMETERS_TO_REMOVE = ImmutableSet
8591 .of (InputFormatConfig .TABLE_SCHEMA , Catalogs .LOCATION , Catalogs .NAME , InputFormatConfig .PARTITION_SPEC );
8692 static final String ORC_FILES_ONLY = "iceberg.orc.files.only" ;
93+ private static final String ZORDER_FIELDS_JSON_KEY = "zorderFields" ;
8794
8895 protected final Configuration conf ;
8996 protected Table icebergTable = null ;
@@ -217,28 +224,82 @@ private void validateCatalogConfigsDefined() {
217224 }
218225 }
219226
227+ /**
228+ * Persists the table's write sort order based on the HMS property 'default-sort-order'
229+ * that is populated by the DDL layer.
230+ * <p>
231+ * Behaviour:
232+ * - If the JSON represents Z-order, we remove DEFAULT_SORT_ORDER
233+ * as Iceberg does not have Z-order support in its spec.
234+ * So, we persist Z-order metadata in {@link org.apache.iceberg.mr.InputFormatConfig#SORT_ORDER}
235+ * and {@link org.apache.iceberg.mr.InputFormatConfig#SORT_COLUMNS} to be used by Hive Writer.
236+ * <p>
237+ * - Otherwise, the JSON is a list of SortFields; we convert it to Iceberg
238+ * SortOrder JSON and keep it in DEFAULT_SORT_ORDER for Iceberg to use it.
239+ */
220240 private void setSortOrder (org .apache .hadoop .hive .metastore .api .Table hmsTable , Schema schema ,
221241 Properties properties ) {
222- String sortOderJSONString = hmsTable .getParameters ().get (TableProperties .DEFAULT_SORT_ORDER );
223- SortFields sortFields = null ;
224- if (!Strings .isNullOrEmpty (sortOderJSONString )) {
225- try {
226- sortFields = JSON_OBJECT_MAPPER .reader ().readValue (sortOderJSONString , SortFields .class );
227- } catch (Exception e ) {
228- LOG .warn ("Can not read write order json: {}" , sortOderJSONString , e );
229- return ;
230- }
242+ String sortOrderJSONString = hmsTable .getParameters ().get (TableProperties .DEFAULT_SORT_ORDER );
243+ if (Strings .isNullOrEmpty (sortOrderJSONString )) {
244+ return ;
245+ }
246+
247+ if (isZOrderJSON (sortOrderJSONString )) {
248+ properties .remove (TableProperties .DEFAULT_SORT_ORDER );
249+ setZOrderSortOrder (sortOrderJSONString , properties , hmsTable .getTableName ());
250+ return ;
251+ }
252+
253+ try {
254+ SortFields sortFields = JSON_OBJECT_MAPPER .reader ().readValue (sortOrderJSONString , SortFields .class );
231255 if (sortFields != null && !sortFields .getSortFields ().isEmpty ()) {
232- SortOrder .Builder sortOderBuilder = SortOrder .builderFor (schema );
256+ SortOrder .Builder sortOrderBuilder = SortOrder .builderFor (schema );
233257 sortFields .getSortFields ().forEach (fieldDesc -> {
234258 NullOrder nullOrder = fieldDesc .getNullOrdering () == NullOrdering .NULLS_FIRST ?
235259 NullOrder .NULLS_FIRST : NullOrder .NULLS_LAST ;
236260 SortDirection sortDirection = fieldDesc .getDirection () == SortFieldDesc .SortDirection .ASC ?
237261 SortDirection .ASC : SortDirection .DESC ;
238- sortOderBuilder .sortBy (fieldDesc .getColumnName (), sortDirection , nullOrder );
262+ sortOrderBuilder .sortBy (fieldDesc .getColumnName (), sortDirection , nullOrder );
239263 });
240- properties .put (TableProperties .DEFAULT_SORT_ORDER , SortOrderParser .toJson (sortOderBuilder .build ()));
264+ properties .put (TableProperties .DEFAULT_SORT_ORDER , SortOrderParser .toJson (sortOrderBuilder .build ()));
265+ }
266+ } catch (Exception e ) {
267+ LOG .warn ("Can not read write order json: {}" , sortOrderJSONString );
268+ }
269+ }
270+
271+ /**
272+ * Configures the Z-order sort order metadata in the given properties
273+ * based on the specified Z-order fields.
274+ *
275+ * @param jsonString the JSON string representing sort orders
276+ * @param properties the Properties object to store sort order metadata
277+ * @param tableName name of the table
278+ */
279+ private void setZOrderSortOrder (String jsonString , Properties properties , String tableName ) {
280+ try {
281+ ZOrderFields zorderFields = JSON_OBJECT_MAPPER .reader ().readValue (jsonString , ZOrderFields .class );
282+ if (zorderFields != null && !zorderFields .getZOrderFields ().isEmpty ()) {
283+ List <String > columnNames = zorderFields .getZOrderFields ().stream ()
284+ .map (ZOrderFieldDesc ::getColumnName )
285+ .collect (Collectors .toList ());
286+
287+ properties .put (SORT_ORDER , ZORDER );
288+ properties .put (SORT_COLUMNS , String .join ("," , columnNames ));
289+
290+ LOG .debug ("Applying Z-ordering for Iceberg Table {} with Columns: {}" , tableName , columnNames );
241291 }
292+ } catch (Exception e ) {
293+ LOG .warn ("Failed to parse Z-order sort order" , e );
294+ }
295+ }
296+
297+ private boolean isZOrderJSON (String jsonString ) {
298+ try {
299+ JsonNode node = JSON_OBJECT_MAPPER .readTree (jsonString );
300+ return node .has (ZORDER_FIELDS_JSON_KEY );
301+ } catch (Exception e ) {
302+ return false ;
242303 }
243304 }
244305
0 commit comments