Skip to content

[faq] upsert from DataFrame to existing table

Gord Thompson edited this page Jan 10, 2022 · 5 revisions

Access SQL does not support special "upsert" syntax (e.g., ON DUPLICATE KEY or INSERT ... ON CONFLICT) but we can upsert from a DataFrame to Access using a temporary table and an UPDATE statement with an outer join. For example, if the existing table has a 2-column composite primary key the code would be something like this:

import pandas as pd
import sqlalchemy as sa
import sqlalchemy_access as sa_a

# ...

main_table_name = "Branch"
temp_table_name = "zzzUpsertTemp"

# initial state
with engine.begin() as conn:
    print(conn.exec_driver_sql(f"SELECT * FROM {main_table_name}").fetchall())
    """console output:
    [('ATB', 'Calgary', '555 5th St NE', 5), 
     ('TD', 'Calgary', '???', -1)]
    """
# test data
data_update = pd.DataFrame(
    [
        ("ATB", "Medicine Hat", "ADDED: 333 3rd St", 3),
        ("TD", "Calgary", "UPDATED: 1717 17th Ave SW", 7),
    ],
    columns=["Institution", "City", "Address", "ATMs"],
)

# step 1: upload to temporary table
data_update.to_sql(
    temp_table_name,
    engine,
    if_exists="replace",
    index=False,
    dtype={
        # override default LongText type for (PK) columns to be JOINed
        "Institution": sa_a.ShortText(50),
        "City": sa_a.ShortText(50),
    },
)
with engine.begin() as conn:
    #
    # step 2: upsert
    # ref: https://stackoverflow.com/a/37014004/2144390
    # note that the primary key for the main table is (Institution, City)
    statement = f"""\
        UPDATE 
            {main_table_name} main
            RIGHT JOIN 
            {temp_table_name} tmp
            ON main.Institution = tmp.Institution 
                AND main.City = tmp.City
        SET main.Institution = tmp.Institution,
            main.City = tmp.City,
            main.Address = tmp.Address, 
            main.ATMs = tmp.ATMs
        """
    conn.exec_driver_sql(statement)
# exit "with" block to trigger commit

# step 3: clean up
with engine.begin() as conn:
    conn.exec_driver_sql(f"DROP TABLE {temp_table_name}")

# final state
with engine.begin() as conn:
    print(conn.exec_driver_sql(f"SELECT * FROM {main_table_name}").fetchall())
    """console output:
    [('ATB', 'Calgary', '555 5th St NE', 5), 
     ('TD', 'Calgary', 'UPDATED: 1717 17th Ave SW', 7), 
     ('ATB', 'Medicine Hat', 'ADDED: 333 3rd St', 3)]
    """