Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Configuration" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="10.0.1" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\DataAccessProvider.Core\DataAccessProvider.Core.csproj" />
<ProjectReference Include="..\DataAccessProvider.MSSQL\DataAccessProvider.MSSQL.csproj" />
<ProjectReference Include="..\DataAccessProvider.Postgres\DataAccessProvider.Postgres.csproj" />
<ProjectReference Include="..\DataAccessProvider.MySql\DataAccessProvider.MySql.csproj" />
</ItemGroup>

<ItemGroup>
<None Update="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
52 changes: 52 additions & 0 deletions DataAccessProvider.TsvImporter/Models/ImdbPerson.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
namespace DataAccessProvider.TsvImporter.Models;

/// <summary>
/// Represents a person from the IMDb name.basics.tsv dataset
/// </summary>
public class ImdbPerson
{
/// <summary>
/// Alphanumeric unique identifier of the name/person (e.g., nm0000001)
/// </summary>
public string Nconst { get; set; } = string.Empty;

/// <summary>
/// Name by which the person is most often credited
/// </summary>
public string PrimaryName { get; set; } = string.Empty;

/// <summary>
/// Birth year in YYYY format, null if not available
/// </summary>
public int? BirthYear { get; set; }

/// <summary>
/// Death year in YYYY format, null if not applicable or not available
/// </summary>
public int? DeathYear { get; set; }

/// <summary>
/// The top-3 professions of the person (comma-separated in TSV)
/// </summary>
public List<string> PrimaryProfession { get; set; } = new();

/// <summary>
/// Titles the person is known for (comma-separated tconsts in TSV)
/// </summary>
public List<string> KnownForTitles { get; set; } = new();

/// <summary>
/// Gets primary profession as comma-separated string for database storage
/// </summary>
public string PrimaryProfessionString => string.Join(",", PrimaryProfession);

/// <summary>
/// Gets known for titles as comma-separated string for database storage
/// </summary>
public string KnownForTitlesString => string.Join(",", KnownForTitles);

public override string ToString()
{
return $"{Nconst}: {PrimaryName} ({BirthYear}{(DeathYear.HasValue ? $"-{DeathYear}" : "")})";
}
}
194 changes: 194 additions & 0 deletions DataAccessProvider.TsvImporter/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
using DataAccessProvider.Core.Extensions;
using DataAccessProvider.Core.Interfaces;
using DataAccessProvider.MSSQL;
using DataAccessProvider.MySql;
using DataAccessProvider.Postgres;
using DataAccessProvider.TsvImporter.Models;
using DataAccessProvider.TsvImporter.Services;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;

namespace DataAccessProvider.TsvImporter;

class Program
{
static async Task Main(string[] args)
{
Console.WriteLine("=== IMDb TSV Data Importer ===\n");

// Build configuration
var configuration = new ConfigurationBuilder()
.SetBasePath(AppContext.BaseDirectory)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
.Build();

// Setup dependency injection
var serviceProvider = ConfigureServices(configuration);

// Get configuration values
var databaseType = configuration["ImportSettings:DatabaseType"] ?? "MSSQL";
var tsvFilePath = configuration["ImportSettings:TsvFilePath"] ?? "name.basics.tsv.gz";
var batchSize = int.Parse(configuration["ImportSettings:BatchSize"] ?? "1000");
var maxRecords = int.Parse(configuration["ImportSettings:MaxRecords"] ?? "0");

// Override with command line arguments if provided
if (args.Length > 0)
{
tsvFilePath = args[0];
}
if (args.Length > 1)
{
databaseType = args[1];
}
if (args.Length > 2)
{
batchSize = int.Parse(args[2]);
}

Console.WriteLine($"Configuration:");
Console.WriteLine($" Database Type: {databaseType}");
Console.WriteLine($" TSV File: {tsvFilePath}");
Console.WriteLine($" Batch Size: {batchSize}");
Console.WriteLine($" Max Records: {(maxRecords > 0 ? maxRecords.ToString() : "Unlimited")}");
Console.WriteLine();

// Validate file exists
if (!File.Exists(tsvFilePath))
{
Console.WriteLine($"Error: TSV file not found: {tsvFilePath}");
Console.WriteLine("\nUsage: DataAccessProvider.TsvImporter [tsvFilePath] [databaseType] [batchSize]");
Console.WriteLine(" tsvFilePath: Path to the TSV file (supports .tsv and .tsv.gz)");
Console.WriteLine(" databaseType: MSSQL, MySQL, or Postgres (default: MSSQL)");
Console.WriteLine(" batchSize: Number of records to insert per batch (default: 1000)");
return;
}

try
{
var logger = serviceProvider.GetRequiredService<ILogger<Program>>();
var dataSourceProvider = serviceProvider.GetRequiredService<IDataSourceProvider>();

// Initialize the database importer
var importer = new DatabaseImporter(dataSourceProvider,
serviceProvider.GetRequiredService<ILogger<DatabaseImporter>>(),
databaseType);

// Create schema if it doesn't exist
Console.WriteLine("Checking database schema...");
var tableExists = await importer.TableExistsAsync();
if (!tableExists)
{
Console.WriteLine("Creating database schema...");
await importer.CreateSchemaAsync();
Console.WriteLine("Schema created successfully!\n");
}
else
{
Console.WriteLine("Schema already exists.\n");
}

// Import data
Console.WriteLine("Starting import...");
var startTime = DateTime.Now;
var totalImported = 0;
var batch = new List<ImdbPerson>();

await foreach (var person in TsvReader.ReadPersonsAsync(tsvFilePath))
{
batch.Add(person);

if (batch.Count >= batchSize)
{
var imported = await importer.ImportBatchAsync(batch);
totalImported += imported;
Console.WriteLine($"Progress: {totalImported} records imported...");
batch.Clear();
}

// Stop if max records reached
if (maxRecords > 0 && totalImported >= maxRecords)
{
break;
}
}

// Import remaining records
if (batch.Count > 0)
{
var imported = await importer.ImportBatchAsync(batch);
totalImported += imported;
}

var duration = DateTime.Now - startTime;
Console.WriteLine($"\n=== Import Complete ===");
Console.WriteLine($"Total records imported: {totalImported}");
Console.WriteLine($"Time taken: {duration.TotalSeconds:F2} seconds");
Console.WriteLine($"Records per second: {(totalImported / duration.TotalSeconds):F2}");
}
catch (Exception ex)
{
Console.WriteLine($"\nError during import: {ex.Message}");
Console.WriteLine(ex.StackTrace);
}
}

static ServiceProvider ConfigureServices(IConfiguration configuration)
{
var services = new ServiceCollection();

// Add logging
services.AddLogging(builder =>
{
builder.AddConfiguration(configuration.GetSection("Logging"));
builder.AddConsole();
});

// Add configuration
services.AddSingleton(configuration);

// Add DataAccessProvider core
services.AddDataAccessProviderCore(configuration);

// Read connection strings
var mssqlConnectionString = configuration.GetConnectionString("MSSQL");
var mysqlConnectionString = configuration.GetConnectionString("MySql");
var postgresConnectionString = configuration.GetConnectionString("Postgres");

// Register database providers based on what's configured
if (!string.IsNullOrWhiteSpace(mssqlConnectionString))
{
services.AddDataAccessProviderMSSQL(mssqlConnectionString);
}

if (!string.IsNullOrWhiteSpace(mysqlConnectionString))
{
services.AddDataAccessProviderMySql(mysqlConnectionString);
}

if (!string.IsNullOrWhiteSpace(postgresConnectionString))
{
services.AddDataAccessProviderPostgres(postgresConnectionString);
}

var serviceProvider = services.BuildServiceProvider();

// Configure providers
if (serviceProvider.GetService<IDataSource<MSSQLSourceParams>>() is not null)
{
serviceProvider.UseDataAccessProviderMSSQL();
}

if (serviceProvider.GetService<IDataSource<MySQLSourceParams>>() is not null)
{
serviceProvider.UseDataAccessProviderMySql();
}

if (serviceProvider.GetService<IDataSource<PostgresSourceParams>>() is not null)
{
serviceProvider.UseDataAccessProviderPostgres();
}

return serviceProvider;
}
}
Loading
Loading