diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c503de3..9b150fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,29 +8,39 @@ on: jobs: build-and-test: - name: Build and Test - runs-on: macos-15 + name: Build and Test (Swift 6.2.1) + runs-on: macos-latest + env: + IOS_DEVICE_NAME: "iPhone 16 Pro" + IOS_OS_VERSION: "18.5" steps: - name: Checkout uses: actions/checkout@v4 - name: Select Xcode - run: sudo xcode-select -s /Applications/Xcode_16.2.app - - - name: Swift Version - run: swift --version - - - name: Build SDK - run: swift build -c release - - # - name: Run Tests - # run: swift test - - # - name: Build Example App - # run: | - # cd Examples/RealtimeExample - # xcodebuild -scheme RealtimeExample \ - # -destination 'platform=iOS Simulator,name=iPhone 15' \ - # clean build \ - # CODE_SIGNING_ALLOWED=NO + run: sudo xcode-select -s /Applications/Xcode_26.1.app + + - name: Build Hygiene (Reset Package State) + run: | + swift package reset + swift build -c debug + + - name: Build iOS App + uses: brightdigit/swift-build@v1.4.2 + with: + build-only: true + # 1. Provide the scheme name + scheme: DecartSDK + + # 2. Specify 'ios' to trigger xcodebuild (instead of 'swift build') + type: ios + deviceName: ${{ env.IOS_DEVICE_NAME }} + osVersion: ${{ env.IOS_OS_VERSION }} + download-platform: true + + # 3. FIX: Path issue. This moves the context into the subfolder + xcode: /Applications/Xcode_26.1.app + # 6. Prettify the logs (optional but recommended) + use-xcbeautify: true + xcbeautify-renderer: github-actions diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c8ee2d1..d355549 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,7 +8,10 @@ on: jobs: release: name: Create Release - runs-on: macos-15 + runs-on: macos-latest + env: + IOS_DEVICE_NAME: "iPhone 16 Pro" + IOS_OS_VERSION: "18.5" permissions: contents: write @@ -18,10 +21,25 @@ jobs: uses: actions/checkout@v4 - name: Select Xcode - run: sudo xcode-select -s /Applications/Xcode_16.2.app + run: sudo xcode-select -s /Applications/Xcode_26.1.app + + - name: Build Hygiene (Reset Package State) + run: | + swift package reset + swift build -c debug - name: Build Release - run: swift build -c release + uses: brightdigit/swift-build@v1.4.2 + with: + build-only: true + scheme: DecartSDK + type: ios + deviceName: ${{ env.IOS_DEVICE_NAME }} + osVersion: ${{ env.IOS_OS_VERSION }} + download-platform: true + xcode: /Applications/Xcode_26.1.app + use-xcbeautify: true + xcbeautify-renderer: github-actions - name: Extract Version id: version diff --git a/Example.xcodeproj/project.pbxproj b/Example.xcodeproj/project.pbxproj index 34ca959..ee4960c 100644 --- a/Example.xcodeproj/project.pbxproj +++ b/Example.xcodeproj/project.pbxproj @@ -7,9 +7,9 @@ objects = { /* Begin PBXBuildFile section */ + 161F07602F0E80DC001B6765 /* DecartSDK in Frameworks */ = {isa = PBXBuildFile; productRef = 161F075F2F0E80DC001B6765 /* DecartSDK */; }; 165B2B9B2ECE09E9004D848C /* Factory in Frameworks */ = {isa = PBXBuildFile; productRef = 165B2B9A2ECE09E9004D848C /* Factory */; }; 165B2B9D2ECE09E9004D848C /* FactoryKit in Frameworks */ = {isa = PBXBuildFile; productRef = 165B2B9C2ECE09E9004D848C /* FactoryKit */; }; - 16E67C3C2EB8CC9F00AF5515 /* DecartSDK in Frameworks */ = {isa = PBXBuildFile; productRef = 16E67C3B2EB8CC9F00AF5515 /* DecartSDK */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -30,8 +30,8 @@ buildActionMask = 2147483647; files = ( 165B2B9D2ECE09E9004D848C /* FactoryKit in Frameworks */, - 16E67C3C2EB8CC9F00AF5515 /* DecartSDK in Frameworks */, 165B2B9B2ECE09E9004D848C /* Factory in Frameworks */, + 161F07602F0E80DC001B6765 /* DecartSDK in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -76,16 +76,15 @@ buildRules = ( ); dependencies = ( - 16E67C3E2EB8D01600AF5515 /* PBXTargetDependency */, ); fileSystemSynchronizedGroups = ( 16D70B982EB8CA0C00455077 /* Example */, ); name = Example; packageProductDependencies = ( - 16E67C3B2EB8CC9F00AF5515 /* DecartSDK */, 165B2B9A2ECE09E9004D848C /* Factory */, 165B2B9C2ECE09E9004D848C /* FactoryKit */, + 161F075F2F0E80DC001B6765 /* DecartSDK */, ); productName = Example; productReference = 16D70B962EB8CA0C00455077 /* Example.app */; @@ -116,8 +115,8 @@ mainGroup = 16D70B8D2EB8CA0C00455077; minimizedProjectReferenceProxies = 1; packageReferences = ( - 16E67C3A2EB8CC9F00AF5515 /* XCLocalSwiftPackageReference "../decart-ios" */, 165B2B992ECE09E9004D848C /* XCRemoteSwiftPackageReference "Factory" */, + 161F075C2F0E80B8001B6765 /* XCLocalSwiftPackageReference "../decart-ios" */, ); preferredProjectObjectVersion = 77; productRefGroup = 16D70B972EB8CA0C00455077 /* Products */; @@ -149,13 +148,6 @@ }; /* End PBXSourcesBuildPhase section */ -/* Begin PBXTargetDependency section */ - 16E67C3E2EB8D01600AF5515 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - productRef = 16E67C3D2EB8D01600AF5515 /* DecartSDK */; - }; -/* End PBXTargetDependency section */ - /* Begin XCBuildConfiguration section */ 16D70B9F2EB8CA0D00455077 /* Debug */ = { isa = XCBuildConfiguration; @@ -217,7 +209,10 @@ ONLY_ACTIVE_ARCH = YES; SDKROOT = iphoneos; SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)"; + SWIFT_DEFAULT_ACTOR_ISOLATION = MainActor; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_STRICT_CONCURRENCY = complete; + SWIFT_VERSION = 6.0; }; name = Debug; }; @@ -274,6 +269,9 @@ MTL_FAST_MATH = YES; SDKROOT = iphoneos; SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_DEFAULT_ACTOR_ISOLATION = MainActor; + SWIFT_STRICT_CONCURRENCY = complete; + SWIFT_VERSION = 6.0; VALIDATE_PRODUCT = YES; }; name = Release; @@ -283,6 +281,7 @@ buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_TEAM = BQ3Q4NZWWC; @@ -302,12 +301,14 @@ MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = ai.decart.Example; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; STRING_CATALOG_GENERATE_SYMBOLS = YES; SWIFT_APPROACHABLE_CONCURRENCY = YES; SWIFT_DEFAULT_ACTOR_ISOLATION = MainActor; SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_STRICT_CONCURRENCY = complete; SWIFT_UPCOMING_FEATURE_MEMBER_IMPORT_VISIBILITY = YES; - SWIFT_VERSION = 5.0; + SWIFT_VERSION = 6.0; TARGETED_DEVICE_FAMILY = "1,2"; }; name = Debug; @@ -317,6 +318,7 @@ buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_TEAM = BQ3Q4NZWWC; @@ -334,14 +336,16 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = ai.decart.Examplea; + PRODUCT_BUNDLE_IDENTIFIER = ai.decart.Example; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; STRING_CATALOG_GENERATE_SYMBOLS = YES; SWIFT_APPROACHABLE_CONCURRENCY = YES; SWIFT_DEFAULT_ACTOR_ISOLATION = MainActor; SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_STRICT_CONCURRENCY = complete; SWIFT_UPCOMING_FEATURE_MEMBER_IMPORT_VISIBILITY = YES; - SWIFT_VERSION = 5.0; + SWIFT_VERSION = 6.0; TARGETED_DEVICE_FAMILY = "1,2"; }; name = Release; @@ -370,7 +374,7 @@ /* End XCConfigurationList section */ /* Begin XCLocalSwiftPackageReference section */ - 16E67C3A2EB8CC9F00AF5515 /* XCLocalSwiftPackageReference "../decart-ios" */ = { + 161F075C2F0E80B8001B6765 /* XCLocalSwiftPackageReference "../decart-ios" */ = { isa = XCLocalSwiftPackageReference; relativePath = "../decart-ios"; }; @@ -388,6 +392,10 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ + 161F075F2F0E80DC001B6765 /* DecartSDK */ = { + isa = XCSwiftPackageProductDependency; + productName = DecartSDK; + }; 165B2B9A2ECE09E9004D848C /* Factory */ = { isa = XCSwiftPackageProductDependency; package = 165B2B992ECE09E9004D848C /* XCRemoteSwiftPackageReference "Factory" */; @@ -398,15 +406,6 @@ package = 165B2B992ECE09E9004D848C /* XCRemoteSwiftPackageReference "Factory" */; productName = FactoryKit; }; - 16E67C3B2EB8CC9F00AF5515 /* DecartSDK */ = { - isa = XCSwiftPackageProductDependency; - productName = DecartSDK; - }; - 16E67C3D2EB8D01600AF5515 /* DecartSDK */ = { - isa = XCSwiftPackageProductDependency; - package = 16E67C3A2EB8CC9F00AF5515 /* XCLocalSwiftPackageReference "../decart-ios" */; - productName = DecartSDK; - }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 16D70B8E2EB8CA0C00455077 /* Project object */; diff --git a/Example.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Example.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index d63e0f8..fb456d7 100644 --- a/Example.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Example.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,6 +1,24 @@ { - "originHash" : "99ec935609087934ec5bde71ae62e18771f6ea5ae003c7c000ef8a1cd073784c", + "originHash" : "98d4c41180155d7ad94e6d792307b58cb8c04adedbfdc05c3629362f72d2d6da", "pins" : [ + { + "identity" : "async-extensions", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/async-extensions.git", + "state" : { + "revision" : "3088474141debc75b78257a0db28adf734bcea0f", + "version" : "4.4.0" + } + }, + { + "identity" : "dispatch-timer", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/dispatch-timer.git", + "state" : { + "revision" : "2d8c304aa6f382a7a362cd5a814884f3930c5662", + "version" : "3.0.1" + } + }, { "identity" : "factory", "kind" : "remoteSourceControl", @@ -10,6 +28,15 @@ "version" : "2.5.3" } }, + { + "identity" : "synchronized", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/synchronized.git", + "state" : { + "revision" : "85653e23270ec88ae19f8d494157769487e34aed", + "version" : "4.0.1" + } + }, { "identity" : "webrtc", "kind" : "remoteSourceControl", @@ -18,6 +45,15 @@ "revision" : "86dbb5cb57e4da009b859a6245b1c10d610f215a", "version" : "140.0.0" } + }, + { + "identity" : "websocket-apple", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/websocket-apple.git", + "state" : { + "revision" : "a176fc4f7b9f7ad4f0a1fa245a2bbb024658a30e", + "version" : "4.1.0" + } } ], "version" : 3 diff --git a/Example.xcodeproj/xcshareddata/xcschemes/Example.xcscheme b/Example.xcodeproj/xcshareddata/xcschemes/Example.xcscheme index 0f704ee..b32be68 100644 --- a/Example.xcodeproj/xcshareddata/xcschemes/Example.xcscheme +++ b/Example.xcodeproj/xcshareddata/xcschemes/Example.xcscheme @@ -56,7 +56,12 @@ + + diff --git a/Example/Example/Config.swift b/Example/Example/Config.swift index 08c8eb8..8f5d366 100644 --- a/Example/Example/Config.swift +++ b/Example/Example/Config.swift @@ -5,11 +5,198 @@ // Created by Alon Bar-el on 19/11/2025. // +import DecartSDK import Foundation +struct PromptPreset: Identifiable, Sendable { + let id = UUID() + let label: String + let prompt: String +} + enum DecartConfig: Sendable { nonisolated static let apiKey = ProcessInfo.processInfo.environment["DECART_API_KEY"] ?? "" - static let defaultPrompt: String = ProcessInfo.processInfo.environment["DECART_DEFAULT_PROMPT"] - ?? "Simpsons" + static func presets(for model: RealtimeModel) -> [PromptPreset] { + switch model { + case .mirage, .mirage_v2: + return miragePresets + case .lucy_v2v_720p_rt: + return lucyEditPresets + } + } + + private static let miragePresets: [PromptPreset] = [ + PromptPreset( + label: "Pirates", + prompt: "Transform the image into Pirates of the Caribbean swashbuckling fantasy style while maintaining the same composition. Use weathered nautical browns and blues, supernatural green ghost effects and tropical Caribbean colors with golden treasure accents. Add water-damaged wooden ship textures, weathered pirate clothing with character-specific details and supernatural decay with ghostly material properties. Apply dramatic lantern and moonlight with supernatural transformation effects, reimagining the elements with historical pirate meets cursed treasure qualities while keeping the same overall arrangement." + ), + PromptPreset( + label: "Bee Hive", + prompt: "Transform the image into a bee hive style with whimsical synthetic-2D animation while maintaining the original composition. Apply a color palette of warm yellows and rich browns reflecting organic honey and wax materials. Create soft, rounded shapes and playful designs with vibrant, saturated colors evoking a lively, bustling atmosphere. Use bright, cheerful lighting simulating sunlight filtering through the hive for a sense of warmth and activity while keeping all elements in their current positions." + ), + PromptPreset( + label: "Van Gogh", + prompt: "Transform the image into Van Gogh painting style while maintaining the same composition. Use vibrant, emotionally expressive yellows, blues and greens with complementary color contrasts and pure unmixed pigments. Create swirling, directional brushwork with thick impasto application that suggests physical dimension. Make lighting appear to radiate from within objects with halos and auras around light sources, reimagining the elements with passionate Post-Impressionist qualities while keeping the same overall arrangement." + ), + PromptPreset( + label: "Yellow Cartoon", + prompt: "Transform the image into The Simpsons animation style while maintaining the same composition. Use the iconic yellow skin tone with bright primary colors and suburban American color schemes in bold saturation. Apply flat cartoon rendering with bold outlines and simple shading techniques. Create bright television lighting with minimal shadows, keeping all elements in their original positions." + ), + PromptPreset( + label: "Cyborgs", + prompt: "Transform the image into Terminator tech-noir style while maintaining the same composition. Use cold blue lighting for future scenes, warm human tones contrasted with metallic silver machine elements and electrical energy effects with blue-white intensity. Add hyperdetailed robotic components with exposed mechanical workings, battle-damaged cyborg elements revealing metal under flesh and post-apocalyptic environmental features. Apply harsh mechanical lighting emphasizing robotic elements with glowing red accents, reimagining the elements with technological horror qualities while keeping the same overall arrangement." + ), + PromptPreset( + label: "Manga", + prompt: "Transform the image into black and white manga illustration style while maintaining the same composition. Use pure blacks, clean whites and varied gray tones achieved through screen tones and hatching techniques. Apply crisp ink lines with varying weights, detailed texture work through stippling and cross-hatching. Create high contrast lighting with dramatic shadows and bright highlights, keeping all elements in their original positions." + ), + PromptPreset( + label: "Animation", + prompt: "Transform the image into Pixar 3D animation style while maintaining the same composition. Use carefully crafted color theory with warm family-friendly tones and cinematic color grading. Apply realistic 3D textures, advanced material shaders and subtle subsurface scattering effects. Create cinematic 3D lighting with realistic light behavior and atmospheric effects, keeping all elements in their original positions." + ), + PromptPreset( + label: "Golden Hour", + prompt: "Transform the image into golden hour style while maintaining the original composition. Apply warm tones with soft lighting that creates long shadows. Enhance the image with warm golden and orange hues throughout. Create a serene, tranquil atmosphere typical of sunset lighting while keeping all elements in their current positions." + ), + PromptPreset( + label: "Ghibli Inspired", + prompt: "Transform the image into Studio Ghibli style with hand-painted watercolor-like visuals while maintaining the same composition. Apply fine lines with soft shading and warm pastel colors in greens, blues and oranges with low saturation. Add delicate watercolor textures, gentle brushstrokes and subtle paper grain. Use soft diffused natural lighting reminiscent of light filtering through shoji screens, keeping all elements in their original positions." + ), + PromptPreset( + label: "War Zone", + prompt: "Transform the image into Mad Max post-apocalyptic style while maintaining the same composition. Use harsh desert oranges and yellows, desaturated dusty neutrals and night scenes illuminated by fire with high contrast. Add rust-covered vehicular modifications with practical mechanical detail, makeshift clothing and armor from salvaged materials and barren wasteland environmental elements. Apply harsh desert lighting with silhouettes against vast wasteland horizons, reimagining the elements with brutal survival qualities while keeping the same overall arrangement." + ), + PromptPreset( + label: "Zombies", + prompt: "Transform the image into a synthetic-3D horror video game style featuring stylized zombies while maintaining the original composition. Apply exaggerated features with high detail and texture that highlight decayed, undead elements. Use a color palette of muted earth tones with pops of eerie greens and purples. Add dramatic lighting with high contrast and deep shadows for a suspenseful, mysterious atmosphere while keeping all elements in their current positions." + ), + PromptPreset( + label: "K-pop", + prompt: "Apply K-pop style with vibrant hyper-polished aesthetic, perfectly styled performers, coordinated fashion-forward outfits, candy-colored pastels alongside bold neons, immaculate styling with glossy skin, and flawless soft-focus lighting" + ), + PromptPreset( + label: "Mythic", + prompt: "Transform the image into God of War game style while maintaining the same composition. Use cold Nordic blues and whites, rich wooden browns and divine gold accents with blood-red highlights. Add weathered leather and metal with intricate Norse knotwork, godly artifacts with magical properties and massive environmental scale elements. Apply dramatic cinematic lighting emphasizing character moments and epic scale, reimagining the elements with a Norse mythological action aesthetic while keeping the same overall arrangement." + ), + PromptPreset( + label: "Wild West", + prompt: "Transform the image into Red Dead Redemption Western game style while maintaining the same composition. Use warm dusty earth tones with golden hour lighting, weathered browns and tans for frontier elements and natural greens for wilderness areas. Add weathered leather and denim with appropriate aging, wooden structures with detailed grain and wear patterns and natural environments with ecological detail. Apply dramatic lighting with stunning sunsets and atmospheric weather effects, reimagining the elements with an American frontier aesthetic while keeping the same overall arrangement." + ), + PromptPreset( + label: "Sci-fi Anime", + prompt: "Transform the image into Rick and Morty animation style while maintaining the same composition. Combine sci-fi colors with earth tones featuring portal greens, space blues and alien color schemes in vivid saturation. Add sci-fi technology textures, interdimensional effect work and adult animation detail levels. Apply dynamic sci-fi lighting with portal effects and alien illumination, keeping all elements in their original positions." + ), + PromptPreset( + label: "Classic Anime", + prompt: "Transform the image into Naruto anime style while maintaining the same composition. Use earth tones with ninja blues, forest greens and warm orange accents in moderate saturation. Combine traditional anime techniques with subtle texture work representing fabric and natural materials. Apply natural outdoor lighting mixed with mystical chakra effects, keeping all elements in their original positions." + ), + PromptPreset( + label: "Blocky", + prompt: "Transform the image into a Minecraft-inspired blocky 3D style with pixelated visual design while maintaining the original composition. Apply distinct cubic shapes throughout. Use a clean, consistent color palette dominated by stone grays, browns, and whites. Create smooth, pixel-consistent textures with low-resolution detail. Add bright, even lighting for an open, constructive atmosphere while keeping all elements in their current positions." + ), + PromptPreset( + label: "Football", + prompt: "Transform the image into American football style with powerful, armored athletic elements. Apply team-specific uniform colors with bold primary hues against green field background. Add football leather grain textures, helmet sheen details, grass-stained jersey effects, and player exertion elements. Use dramatic stadium lighting creating strong contrasts with harsh shadows. Maintain the subject's position while incorporating football action characteristics. Create an intense, gritty atmosphere filled with tension and explosive athleticism." + ), + PromptPreset( + label: "Picasso", + prompt: "Transform the image into Picasso Cubist style while maintaining the same composition. Use flat, bold hues of blues, reds and earth tones applied in geometric patches with strong black outlines. Create flattened surfaces with angular planes that break traditional perspective rules, revealing multiple viewpoints simultaneously. Apply non-naturalistic, symbolic lighting rather than representational, reimagining the elements with fragmented, multi-perspective Cubist qualities while keeping the same overall arrangement." + ), + PromptPreset( + label: "Super Hero", + prompt: "Transform the image into Marvel Cinematic Universe superhero style while maintaining the same composition. Use character-specific signature colors, location-specific color grading and vibrant energy effect colors with high saturation. Add practical superhero costumes with functional detailing, urban environments with appropriate destruction physics and magical/technological effects with distinctive visual signatures. Apply dynamic action lighting emphasizing heroic moments with dramatic highlights, reimagining the elements with colorful optimistic superhero qualities while keeping the same overall arrangement." + ), + PromptPreset( + label: "Neon Nostalgia", + prompt: "Transform the image into a cyberpunk anime style with neon colors and vibrant contrasts while maintaining the original composition. Apply a color palette of electric blues, pinks, and purples against a dark, rainy backdrop. Make surfaces appear reflective and wet with shimmering reflections. Create an intense, moody atmosphere with dynamic lighting to evoke an urban, futuristic aesthetic while keeping all elements in their current positions." + ), + ] + + private static let lucyEditPresets: [PromptPreset] = [ + PromptPreset( + label: "Anime Character", + prompt: "Transform the person into a 2D anime character with smooth cel-shaded lines, soft pastel highlights, large expressive eyes, clean contours, even lighting, simplified textures, and a bright studio-style background for a polished anime look." + ), + PromptPreset( + label: "Knight Armor", + prompt: "Change the uniform to a full medieval knight's armor with polished steel plates, engraved trim, articulated joints, matte underpadding, subtle battle scuffs, and cool directional lighting reflecting off the metal surfaces." + ), + PromptPreset( + label: "Spooky Skeleton", + prompt: "Replace the person with a Halloween-style skeleton featuring clean ivory bones, deep sockets, subtle surface cracks, articulated joints, and soft overhead lighting creating dramatic shadows across the ribcage." + ), + PromptPreset( + label: "Leather Jacket", + prompt: "Change the jacket to a black leather biker jacket with weathered grain texture, silver zippers, reinforced seams, slightly creased sleeves, and cool diffuse lighting suggesting an overcast outdoor feel." + ), + PromptPreset( + label: "Origami", + prompt: "Replace the person with a full-body origami figure built from crisp white folded paper, sharp geometric edges, layered segments, subtle crease shadows, and clean studio lighting enhancing the sculptural form." + ), + PromptPreset( + label: "Business Casual", + prompt: "Change the outfit to a light blue buttoned shirt paired with a tailored charcoal jacket, smooth cotton texture, clean stitching, structured shoulders, and balanced indoor lighting for a polished business-casual look." + ), + PromptPreset( + label: "Summer Dress", + prompt: "Change the outfit to a light floral summer dress with thin spaghetti straps, soft flowing fabric, pastel bloom patterns, gentle folds, and warm natural lighting suggesting an outdoor summer setting." + ), + PromptPreset( + label: "Lizard Person", + prompt: "Transform the person into a humanoid lizard figure with green scaled skin, subtle iridescence, angular cheek structure, elongated pupils, fine texture detail, and directional lighting emphasizing the reptilian contours." + ), + PromptPreset( + label: "Pink Shirt", + prompt: "Change the top color to bright pink with smooth fabric texture, preserved seams, soft shading along natural folds, and consistent lighting for an even saturated look." + ), + PromptPreset( + label: "Plastic Doll", + prompt: "Transform the person into a realistic fashion-doll version with smooth porcelain-like skin, glossy lips, defined lashes, polished facial symmetry, bright studio lighting, perfectly styled hair, and a fitted pink outfit with clean plastic-like highlights." + ), + PromptPreset( + label: "Sunglasses", + prompt: "Add a pair of dark tinted sunglasses resting naturally on the person's face, smooth acetate frames, subtle reflections on the lenses, accurate nose placement, and soft shadows across the cheeks." + ), + PromptPreset( + label: "Super Hero", + prompt: "Transform the person into a superhero wearing a fitted suit with bold color panels, textured fabric, sculpted contours, a flowing cape, subtle rim lighting, and dramatic cinematic shading." + ), + PromptPreset( + label: "Polar Bear", + prompt: "Replace the person with a small polar bear featuring dense white fur, rounded ears, soft muzzle, gentle expression, and cool ambient lighting highlighting the fluffy texture." + ), + PromptPreset( + label: "Alien", + prompt: "Transform the person into a realistic alien form with pale luminescent skin, smooth reflective surface tones, large glassy eyes, subtle facial ridges, elongated contours, and cinematic lighting emphasizing the otherworldly texture." + ), + PromptPreset( + label: "Parrot on Shoulder", + prompt: "Add a bright green parrot perched on the person's shoulder with layered feathers, a curved beak, slight head tilt, natural talon grip, and a soft contact shadow on the clothing." + ), + PromptPreset( + label: "Icy Hair", + prompt: "Change the hair color to icy platinum blonde with a cool metallic sheen, fine reflective highlights, smooth strands, and bright soft lighting emphasizing the frosted tone." + ), + PromptPreset( + label: "Tux", + prompt: "Change the shirt to a formal tuxedo ensemble featuring a crisp white dress shirt, black satin lapels, structured fit, smooth fabric textures, and balanced indoor lighting for an elegant look." + ), + PromptPreset( + label: "Kitty", + prompt: "Add a small cat sitting gently on the person's head, soft striped fur, relaxed posture, curved tail, clear whiskers, natural grip on the hair, and a soft contact shadow for realism." + ), + PromptPreset( + label: "Super Spider", + prompt: "Transform the person into a Spider-Man–style hero wearing a red and blue textured suit, raised web patterns, fitted contours, reflective eye lenses, and dramatic city-style lighting." + ), + PromptPreset( + label: "Car Racer", + prompt: "Transform the person into a professional car racer wearing a padded racing suit with bold sponsor patches, high-contrast stitching, protective collar, and bright track-side lighting." + ), + PromptPreset( + label: "Happy Birthday", + prompt: "Add a mix of colorful helium balloons floating around the person, glossy surfaces, thin strings, soft reflections, varied sizes, and warm ambient party lighting." + ), + ] } diff --git a/Example/Example/ContentView.swift b/Example/Example/ContentView.swift index 538be83..c5a0848 100644 --- a/Example/Example/ContentView.swift +++ b/Example/Example/ContentView.swift @@ -10,7 +10,7 @@ import SwiftUI struct ContentView: View { var body: some View { - NavigationView { + NavigationStack { List { Section(header: Text("Realtime")) { ForEach(RealtimeModel.allCases, id: \.self) { model in @@ -22,29 +22,21 @@ struct ContentView: View { Section(header: Text("Image Generation")) { ForEach(ImageModel.allCases, id: \.self) { model in - NavigationLink( - destination: GenerateImageView( - model: model - ) - ) { - Text("Image - \(model.rawValue)") + NavigationLink("Image - \(model.rawValue)") { + GenerateImageView(model: model) } } } Section(header: Text("Video Generation")) { ForEach(VideoModel.allCases, id: \.self) { model in - NavigationLink( - destination: GenerateVideoView( - model: model - ) - ) { - Text("Video - \(model.rawValue)") + NavigationLink("Video - \(model.rawValue)") { + GenerateVideoView(model: model) } } } } - .navigationBarTitle("Example") + .navigationTitle("Example") } } } diff --git a/Example/Example/DecartSDK/DecartConfig.swift b/Example/Example/DecartSDK/DecartClientShared.swift similarity index 90% rename from Example/Example/DecartSDK/DecartConfig.swift rename to Example/Example/DecartSDK/DecartClientShared.swift index 6102b74..439dbc1 100644 --- a/Example/Example/DecartSDK/DecartConfig.swift +++ b/Example/Example/DecartSDK/DecartClientShared.swift @@ -9,7 +9,7 @@ import DecartSDK import Factory import WebRTC -protocol RealtimeManager { +protocol RealtimeManagerProtocol { var currentPrompt: Prompt { get set } var shouldMirror: Bool { get set } @@ -18,7 +18,7 @@ protocol RealtimeManager { var localMediaStream: RealtimeMediaStream? { get } var remoteMediaStreams: RealtimeMediaStream? { get } - func connect(model: RealtimeModel) async + func connect() async func switchCamera() async func cleanup() async } diff --git a/Example/Example/DecartSDK/DecartRealtimeManager.swift b/Example/Example/DecartSDK/DecartRealtimeManager.swift deleted file mode 100644 index 13dce01..0000000 --- a/Example/Example/DecartSDK/DecartRealtimeManager.swift +++ /dev/null @@ -1,159 +0,0 @@ -// -// RealtimeManager.swift -// Example -// -// Created by Alon Bar-el on 04/11/2025. -// -import Combine -import DecartSDK -import Factory -import SwiftUI -import WebRTC - -@MainActor -@Observable -final class DecartRealtimeManager: RealtimeManager { - @ObservationIgnored - private let decartClient = Container.shared.decartClient() - - var currentPrompt: Prompt { - didSet { - Task { [weak self] in - guard let self, let client = self.realtimeClient else { return } - do { - try await client.setPrompt(currentPrompt) - } catch { - DecartLogger.log( - "failed to update prompt: \(error.localizedDescription)", level: .error - ) - } - } - } - } - - var shouldMirror: Bool - - private(set) var connectionState: DecartRealtimeConnectionState = .idle - - @ObservationIgnored - private(set) var localMediaStream: RealtimeMediaStream? - @ObservationIgnored - private(set) var remoteMediaStreams: RealtimeMediaStream? - - @ObservationIgnored - private var realtimeClient: RealtimeClient? - @ObservationIgnored - private var videoCapturer: RTCCameraVideoCapturer? - @ObservationIgnored - private var eventTask: Task? - - init( - currentPrompt: Prompt, - isMirroringEnabled: Bool = true // since the initial camera is the front facing one - ) { - self.currentPrompt = currentPrompt - self.shouldMirror = isMirroringEnabled - } - - func switchCamera() async { - print("switching camera to \(shouldMirror ? "back" : "front") camera") - guard let videoCapturer, let realtimeClient else { - preconditionFailure("🚨 videoCapturer is nil when switching camera") - } - do { - try await RealtimeCameraCapture.switchCamera( - capturer: videoCapturer, - realtimeClient: realtimeClient, - newPosition: shouldMirror ? .back : .front - ) - shouldMirror.toggle() - } catch { - DecartLogger.log("error while switching camera!", level: .error) - } - } - - func connect(model: RealtimeModel) async { - if connectionState.isInSession || realtimeClient != nil { - await cleanup() - } - - connectionState = .connecting - - do { - realtimeClient = - try decartClient - .createRealtimeClient( - options: RealtimeConfiguration( - model: Models.realtime(model), - initialState: ModelState( - prompt: currentPrompt - ) - )) - guard let realtimeClient else { - preconditionFailure("🚨 realtimeClient is nil after creating it") - } - - monitorEvents() - - (localMediaStream, videoCapturer) = - try await RealtimeCameraCapture - .captureLocalCameraStream( - realtimeClient: realtimeClient, - cameraFacing: .front - ) - - DecartLogger.log("Connecting to WebRTC...", level: .info) - remoteMediaStreams = - try await realtimeClient - .connect(localStream: localMediaStream!) - } catch { - DecartLogger.log( - "Connection failed with error: \(error.localizedDescription)", level: .error - ) - DecartLogger.log("Error details: \(error)", level: .error) - await cleanup() - } - } - - private func monitorEvents() { - eventTask?.cancel() - - eventTask = Task { [weak self] in - guard let self, let stream = self.realtimeClient?.events else { return } - - for await state in stream { - if Task.isCancelled { return } - - DecartLogger.log("Connection state changed: \(state)", level: .info) - self.connectionState = state - - if state == .error { - DecartLogger.log("Error state received", level: .error) - // Should we disconnect on error? The connection might already be broken. - // Cleanup handles it if needed, or we can just stay in error state. - // For now, just updating state is enough as UI reacts to it. - } - } - } - } - - func cleanup() async { - DecartLogger.log("Starting cleanup...", level: .info) - eventTask?.cancel() - eventTask = nil - - if let capturer = videoCapturer { - await withCheckedContinuation { (k: CheckedContinuation) in - capturer.stopCapture { k.resume() } - } - } - videoCapturer = nil - await realtimeClient?.disconnect() - realtimeClient = nil - remoteMediaStreams = nil - localMediaStream = nil - connectionState = .idle - - DecartLogger.log("Cleanup complete.", level: .success) - } -} diff --git a/Example/Example/DecartSDK/ImageFetcher.swift b/Example/Example/DecartSDK/ImageFetcher.swift index f70feba..dcecf3c 100644 --- a/Example/Example/DecartSDK/ImageFetcher.swift +++ b/Example/Example/DecartSDK/ImageFetcher.swift @@ -18,28 +18,59 @@ final class ImageFetcher { @ObservationIgnored private let decartClient = Container.shared.decartClient() + @ObservationIgnored + private static let urlSession: URLSession = { + let config = URLSessionConfiguration.default + config.urlCache = nil + config.requestCachePolicy = .reloadIgnoringLocalCacheData + return URLSession(configuration: config) + }() + + private var generateImageTask: Task? + var prompt: String = "" var generatedImage: UIImage? var isProcessing: Bool = false var errorMessage: String? + func cancelGeneration() { + generateImageTask?.cancel() + generateImageTask = nil + } + func reset() { + cancelGeneration() prompt = "" generatedImage = nil errorMessage = nil isProcessing = false } - func fetchImage(model: ImageModel, inputType: ModelInputType, selectedItem: PhotosPickerItem) - async - { - let trimmedPrompt = prompt.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmedPrompt.isEmpty else { return } + func fetchImage(model: ImageModel, inputType: ModelInputType, selectedItem: PhotosPickerItem?) { + let currentPrompt = prompt + guard !currentPrompt.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { return } + generateImageTask?.cancel() isProcessing = true errorMessage = nil generatedImage = nil + generateImageTask = Task { [weak self] in + await self?.performFetchImage( + model: model, + inputType: inputType, + selectedItem: selectedItem, + prompt: currentPrompt + ) + } + } + + private func performFetchImage( + model: ImageModel, + inputType: ModelInputType, + selectedItem: PhotosPickerItem?, + prompt: String + ) async { defer { isProcessing = false } @@ -49,40 +80,57 @@ final class ImageFetcher { switch inputType { case .textToImage: - let input = TextToImageInput(prompt: trimmedPrompt) + let input = try TextToImageInput(prompt: prompt) processClient = try decartClient.createProcessClient( model: model, - input: input + input: input, + session: Self.urlSession ) case .imageToImage: - guard let referenceData = try await selectedItem.loadTransferable(type: Data.self) + guard let selectedItem else { + throw DecartError.invalidInput("No image selected") + } + + guard !Task.isCancelled else { return } + + guard let rawData = try await selectedItem.loadTransferable(type: Data.self), + let image = UIImage(data: rawData), + let fixedImage = image.fixOrientation(), + let imageData = fixedImage.jpegData(compressionQuality: 0.9) else { - throw DecartError.invalidInput("Failed to load the selected image") + throw DecartError.invalidInput("Failed to load image data") } - let fileInput = FileInput.image( - data: referenceData, - filename: "reference.jpg" - ) - let input = ImageToImageInput(prompt: trimmedPrompt, data: fileInput) + guard !Task.isCancelled else { return } + + let fileInput = try FileInput.image(data: imageData) + let input = try ImageToImageInput(prompt: prompt, data: fileInput) processClient = try decartClient.createProcessClient( model: model, - input: input + input: input, + session: Self.urlSession ) default: throw DecartError.invalidInput("Unsupported input type") } + guard !Task.isCancelled else { return } + let data = try await processClient.process() + + guard !Task.isCancelled else { return } + guard let image = UIImage(data: data) else { errorMessage = "Failed to decode image data" return } generatedImage = image } catch { - errorMessage = error.localizedDescription + if !Task.isCancelled { + errorMessage = error.localizedDescription + } } } } diff --git a/Example/Example/DecartSDK/RealtimeManager.swift b/Example/Example/DecartSDK/RealtimeManager.swift new file mode 100644 index 0000000..f602fd2 --- /dev/null +++ b/Example/Example/DecartSDK/RealtimeManager.swift @@ -0,0 +1,179 @@ +// +// DecartRealtimeManager.swift +// Example +// +// Created by Alon Bar-el on 04/11/2025. +// +import Combine +import DecartSDK +import Factory +import SwiftUI +@preconcurrency import WebRTC + +@MainActor +@Observable +final class RealtimeManager: RealtimeManagerProtocol { + // MARK: - Public State + + var currentPrompt: Prompt { + didSet { + // Send updated prompt to the server for real-time style changes + realtimeManager?.setPrompt(currentPrompt) + } + } + + var shouldMirror: Bool + + private(set) var connectionState: DecartRealtimeConnectionState = .idle + private(set) var localMediaStream: RealtimeMediaStream? + private(set) var remoteMediaStreams: RealtimeMediaStream? + + // MARK: - Private + + @ObservationIgnored + private let decartClient = Container.shared.decartClient() + + @ObservationIgnored + private let model: RealtimeModel + + @ObservationIgnored + private var realtimeManager: DecartRealtimeManager? + + @ObservationIgnored + private var eventTask: Task? + + #if !targetEnvironment(simulator) + @ObservationIgnored + private var capture: RealtimeCapture? + #endif + + // MARK: - Init + + init(model: RealtimeModel, currentPrompt: Prompt, isMirroringEnabled: Bool = true) { + self.model = model + self.currentPrompt = currentPrompt + self.shouldMirror = isMirroringEnabled + } + + // MARK: - Public API + + func connect() async { + if connectionState.isInSession || realtimeManager != nil { + await cleanup() + } + + connectionState = .connecting + + do { + let modelConfig = Models.realtime(model) + + // Initialize the WebRTC manager with model config and initial prompt + realtimeManager = try decartClient.createRealtimeManager( + options: RealtimeConfiguration( + model: modelConfig, + initialState: ModelState(prompt: currentPrompt) + ) + ) + + guard let realtimeManager else { + preconditionFailure("realtimeManager is nil after creation") + } + + // Listen for connection state changes (connecting, connected, error, etc.) + startEventMonitoring() + + #if !targetEnvironment(simulator) + try await startCapture(model: modelConfig) + + // Establish WebRTC connection - sends local video, receives AI-processed video + remoteMediaStreams = try await realtimeManager.connect(localStream: localMediaStream!) + #endif + } catch { + DecartLogger.log("Connection failed: \(error.localizedDescription)", level: .error) + await cleanup() + } + } + + func switchCamera() async { + #if !targetEnvironment(simulator) + guard let capture else { return } + do { + // Toggle between front and back camera + try await capture.switchCamera() + shouldMirror = capture.position == .front + } catch { + DecartLogger.log("Failed to switch camera", level: .error) + } + #endif + } + + func cleanup() async { + connectionState = .idle + + // Brief delay to allow UI to update before teardown + try? await Task.sleep(nanoseconds: 100_000_000) + + eventTask?.cancel() + eventTask = nil + + disableMediaTracks() + + #if !targetEnvironment(simulator) + // Release camera resources + await capture?.stopCapture() + capture = nil + #endif + + // Close WebRTC connection and release server resources + await realtimeManager?.disconnect() + realtimeManager = nil + + localMediaStream = nil + remoteMediaStreams = nil + } + + // MARK: - Private Helpers + + #if !targetEnvironment(simulator) + private func startCapture(model: ModelDefinition) async throws { + guard let realtimeManager else { return } + + // Create a video source that camera frames will be written to + let videoSource = realtimeManager.createVideoSource() + + // Initialize camera capture with model-specific settings (resolution, fps) + capture = RealtimeCapture(model: model, videoSource: videoSource) + try await capture?.startCapture() + + // Wrap the video source in a track for WebRTC transmission + let videoTrack = realtimeManager.createVideoTrack(source: videoSource, trackId: "video0") + localMediaStream = RealtimeMediaStream(videoTrack: videoTrack, id: .localStream) + } + #endif + + private func startEventMonitoring() { + eventTask?.cancel() + + eventTask = Task { [weak self] in + // Subscribe to connection state updates from the SDK + guard let self, let stream = self.realtimeManager?.events else { return } + + for await state in stream { + if Task.isCancelled { return } + if state == .error { + // Treat signaling (WS) disconnects as disconnected in the example UI. + self.connectionState = .error + } else { + self.connectionState = state + } + } + } + } + + private func disableMediaTracks() { + localMediaStream?.videoTrack.isEnabled = false + localMediaStream?.audioTrack?.isEnabled = false + remoteMediaStreams?.videoTrack.isEnabled = false + remoteMediaStreams?.audioTrack?.isEnabled = false + } +} diff --git a/Example/Example/DecartSDK/UIImage+Normalized.swift b/Example/Example/DecartSDK/UIImage+Normalized.swift new file mode 100644 index 0000000..efb3272 --- /dev/null +++ b/Example/Example/DecartSDK/UIImage+Normalized.swift @@ -0,0 +1,22 @@ +// +// UIImage+Normalized.swift +// Example +// +// Created by Alon Bar-el on 23/11/2025. +// + +import UIKit + +extension UIImage { + func fixOrientation() -> UIImage? { + if imageOrientation == .up { + return self + } + + UIGraphicsBeginImageContext(size) + draw(in: CGRect(origin: .zero, size: size)) + let normalizedImage = UIGraphicsGetImageFromCurrentImageContext() + UIGraphicsEndImageContext() + return normalizedImage + } +} diff --git a/Example/Example/DecartSDK/VideoFetcher.swift b/Example/Example/DecartSDK/VideoFetcher.swift index 6c99d0d..4559196 100644 --- a/Example/Example/DecartSDK/VideoFetcher.swift +++ b/Example/Example/DecartSDK/VideoFetcher.swift @@ -15,160 +15,159 @@ import SwiftUI @MainActor @Observable final class VideoFetcher { - @ObservationIgnored - private let decartClient = Container.shared.decartClient() - - private var generateVideoTask: Task? - - var prompt: String = "" - var generatedVideoURL: URL? - var videoPlayer: AVPlayer? - var isProcessing: Bool = false - var errorMessage: String? - - func reset() { - prompt = "" - - // Delete temporary video file if it exists - if let videoURL = generatedVideoURL { - try? FileManager.default.removeItem(at: videoURL) - } - - generatedVideoURL = nil - errorMessage = nil - isProcessing = false - videoPlayer?.pause() - videoPlayer = nil - generateVideoTask?.cancel() - generateVideoTask = nil - } - - func cancelGeneration() { - generateVideoTask?.cancel() - generateVideoTask = nil - videoPlayer?.pause() - } - - func fetchVideo(model: VideoModel, inputType: ModelInputType, selectedItem: PhotosPickerItem?) { - let trimmedPrompt = prompt.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmedPrompt.isEmpty else { return } - - generateVideoTask?.cancel() - isProcessing = true - errorMessage = nil - generatedVideoURL = nil - - generateVideoTask = Task { [weak self, selectedItem] in - if let videoURL = self?.generatedVideoURL { - try? FileManager.default.removeItem(at: videoURL) - } - - await self?.generateVideo( - trimmedPrompt: trimmedPrompt, - model: model, - inputType: inputType, - selectedItem: selectedItem - ) - } - } - - private func generateVideo( - trimmedPrompt: String, - model: VideoModel, - inputType: ModelInputType, - selectedItem: PhotosPickerItem? - ) async { - defer { - isProcessing = false - } - - do { - let processClient = try await buildProcessClient( - prompt: trimmedPrompt, - model: model, - inputType: inputType, - selectedItem: selectedItem - ) - guard !Task.isCancelled else { return } - - let data = try await processClient.process() - let tempURL = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - .appendingPathExtension("mp4") - try data.write(to: tempURL, options: .atomic) - - if Task.isCancelled { - return - } - - videoPlayer?.pause() - generatedVideoURL = tempURL - videoPlayer = AVPlayer(url: tempURL) - } catch { - if Task.isCancelled { - return - } - errorMessage = error.localizedDescription - } - } - - private func buildProcessClient( - prompt: String, - model: VideoModel, - inputType: ModelInputType, - selectedItem: PhotosPickerItem? - ) async throws -> ProcessClient { - switch inputType { - case .textToVideo: - let input = TextToVideoInput(prompt: prompt) - return try decartClient.createProcessClient(model: model, input: input) - - case .imageToVideo: - let fileInput = try await fileInput(from: selectedItem, requiresVideo: false) - let input = ImageToVideoInput(prompt: prompt, data: fileInput) - return try decartClient.createProcessClient(model: model, input: input) - - case .videoToVideo: - let fileInput = try await fileInput(from: selectedItem, requiresVideo: true) - let input = VideoToVideoInput(prompt: prompt, data: fileInput) - return try decartClient.createProcessClient(model: model, input: input) - - default: - throw DecartError.invalidInput("Unsupported input type") - } - } - - private func fileInput(from item: PhotosPickerItem?, requiresVideo: Bool) async throws - -> FileInput - { - guard let item else { - throw DecartError.invalidInput( - requiresVideo ? "Please attach a video first" : "Please attach an image first" - ) - } - - guard let data = try await item.loadTransferable(type: Data.self) else { - throw DecartError.invalidInput("Failed to load selected media") - } - - guard let mediaType = resolveMediaType(for: item) else { - throw DecartError.invalidInput("Unsupported media type") - } - - if requiresVideo && mediaType.conforms(to: .video) == false { - throw DecartError.invalidInput("Please attach a video file") - } - - if !requiresVideo && mediaType.conforms(to: .image) == false { - throw DecartError.invalidInput("Please attach an image file") - } - - return try FileInput.from(data: data, uniformType: mediaType) - } - - private func resolveMediaType(for item: PhotosPickerItem) -> UTType? { - item.supportedContentTypes.first(where: { - $0.conforms(to: .video) || $0.conforms(to: .image) - }) ?? item.supportedContentTypes.first - } + @ObservationIgnored + private let decartClient = Container.shared.decartClient() + + @ObservationIgnored + private static let urlSession: URLSession = { + let config = URLSessionConfiguration.default + config.urlCache = nil + config.requestCachePolicy = .reloadIgnoringLocalCacheData + return URLSession(configuration: config) + }() + + private var generateVideoTask: Task? + + var prompt: String = "" + var generatedVideoURL: URL? + var videoPlayer: AVPlayer? + var isProcessing: Bool = false + var errorMessage: String? + + func reset() { + prompt = "" + + if let videoURL = generatedVideoURL { + try? FileManager.default.removeItem(at: videoURL) + } + + generatedVideoURL = nil + errorMessage = nil + isProcessing = false + videoPlayer?.pause() + videoPlayer = nil + generateVideoTask?.cancel() + generateVideoTask = nil + } + + func cancelGeneration() { + generateVideoTask?.cancel() + generateVideoTask = nil + videoPlayer?.pause() + } + + func fetchVideo(model: VideoModel, inputType: ModelInputType, selectedItem: PhotosPickerItem?) { + let trimmedPrompt = prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedPrompt.isEmpty else { return } + + generateVideoTask?.cancel() + isProcessing = true + errorMessage = nil + generatedVideoURL = nil + + generateVideoTask = Task { [weak self, selectedItem] in + if let videoURL = self?.generatedVideoURL { + try? FileManager.default.removeItem(at: videoURL) + } + + await self?.generateVideo( + trimmedPrompt: trimmedPrompt, + model: model, + inputType: inputType, + selectedItem: selectedItem + ) + } + } + + private func generateVideo( + trimmedPrompt: String, + model: VideoModel, + inputType: ModelInputType, + selectedItem: PhotosPickerItem? + ) async { + defer { + isProcessing = false + } + + do { + let processClient = try await buildProcessClient( + prompt: trimmedPrompt, + model: model, + inputType: inputType, + selectedItem: selectedItem + ) + guard !Task.isCancelled else { return } + + let data = try await processClient.process() + let tempURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("mp4") + try data.write(to: tempURL, options: .atomic) + + if Task.isCancelled { + return + } + + videoPlayer?.pause() + generatedVideoURL = tempURL + videoPlayer = AVPlayer(url: tempURL) + } catch { + if Task.isCancelled { + return + } + errorMessage = error.localizedDescription + } + } + + private func buildProcessClient( + prompt: String, + model: VideoModel, + inputType: ModelInputType, + selectedItem: PhotosPickerItem? + ) async throws -> ProcessClient { + switch inputType { + case .textToVideo: + let input = try TextToVideoInput(prompt: prompt) + return try decartClient.createProcessClient(model: model, input: input, session: Self.urlSession) + + case .imageToVideo: + let fileInput = try await loadFileInput(from: selectedItem) + let input = try ImageToVideoInput(prompt: prompt, data: fileInput) + return try decartClient.createProcessClient(model: model, input: input, session: Self.urlSession) + + case .videoToVideo: + let fileInput = try await loadFileInput(from: selectedItem) + let input = try VideoToVideoInput(prompt: prompt, data: fileInput) + return try decartClient.createProcessClient(model: model, input: input, session: Self.urlSession) + + default: + throw DecartError.invalidInput("Unsupported input type") + } + } + + private func loadFileInput(from item: PhotosPickerItem?) async throws -> FileInput { + guard let item else { + throw DecartError.invalidInput("No media selected") + } + + guard var data = try await item.loadTransferable(type: Data.self) else { + throw DecartError.invalidInput("Failed to load selected media") + } + + let mediaType = item.supportedContentTypes.first(where: { + $0.conforms(to: .movie) || $0.conforms(to: .video) || $0.conforms(to: .image) + }) + + if let type = mediaType, type.conforms(to: .image) { + guard let image = UIImage(data: data), + let fixedImage = image.fixOrientation(), + let jpegData = fixedImage.jpegData(compressionQuality: 0.9) + else { + throw DecartError.invalidInput("Failed to process image") + } + data = jpegData + } + + return try FileInput.from(data: data, uniformType: mediaType) + } } diff --git a/Example/Example/Views/DraggableRTCVideoView.swift b/Example/Example/Views/DraggableRTCVideoView.swift index 3e42a3b..b509fcb 100644 --- a/Example/Example/Views/DraggableRTCVideoView.swift +++ b/Example/Example/Views/DraggableRTCVideoView.swift @@ -10,7 +10,7 @@ import SwiftUI import WebRTC struct DraggableRTCVideoView: View { - let track: RTCVideoTrack + let track: RTCVideoTrack? let mirror: Bool @State private var offset: CGSize = .zero diff --git a/Example/Example/Views/GenerateImageView.swift b/Example/Example/Views/GenerateImageView.swift index 2a10996..1f20110 100644 --- a/Example/Example/Views/GenerateImageView.swift +++ b/Example/Example/Views/GenerateImageView.swift @@ -16,6 +16,7 @@ struct GenerateImageView: View { @State private var imageFetcher = ImageFetcher() @State private var selectedItem: PhotosPickerItem? @State private var selectedImagePreview: UIImage? + @State private var previewLoadTask: Task? @FocusState private var promptFocused: Bool private var inputType: ModelInputType { @@ -50,6 +51,14 @@ struct GenerateImageView: View { .padding(.horizontal) .padding(.bottom) } + .onDisappear { + previewLoadTask?.cancel() + previewLoadTask = nil + imageFetcher.cancelGeneration() + imageFetcher.reset() + selectedItem = nil + selectedImagePreview = nil + } .navigationTitle(model.rawValue) .navigationBarTitleDisplayMode(.inline) } @@ -113,7 +122,7 @@ struct GenerateImageView: View { } TextField("Enter prompt…", text: $imageFetcher.prompt, axis: .vertical) - .lineLimit(1...3) + .lineLimit(1 ... 3) .textFieldStyle(.roundedBorder) .disabled(imageFetcher.isProcessing) .focused($promptFocused) @@ -137,14 +146,17 @@ struct GenerateImageView: View { .disabled(!canSend) } }.onChange(of: selectedItem) { + previewLoadTask?.cancel() guard let selectedItem else { selectedImagePreview = nil return } - Task { + previewLoadTask = Task { + guard !Task.isCancelled else { return } let imagePreview = try? await selectedItem.loadTransferable( type: Data.self ) + guard !Task.isCancelled else { return } if let uiImage = UIImage(data: imagePreview ?? Data()) { selectedImagePreview = uiImage } @@ -154,15 +166,19 @@ struct GenerateImageView: View { private func generate() { dismissKeyboard() - Task { - guard let selectedItem else { - return - } - await imageFetcher.fetchImage( + if requiresReference { + guard let selectedItem else { return } + imageFetcher.fetchImage( model: model, inputType: inputType, selectedItem: selectedItem ) + } else { + imageFetcher.fetchImage( + model: model, + inputType: inputType, + selectedItem: nil + ) } } diff --git a/Example/Example/Views/GenerateVideoView.swift b/Example/Example/Views/GenerateVideoView.swift index 4a390ff..f812054 100644 --- a/Example/Example/Views/GenerateVideoView.swift +++ b/Example/Example/Views/GenerateVideoView.swift @@ -16,6 +16,7 @@ struct GenerateVideoView: View { @State private var videoFetcher = VideoFetcher() @State private var selectedItem: PhotosPickerItem? @State private var selectedMediaPreview: UIImage? + @State private var previewLoadTask: Task? @FocusState private var promptFocused: Bool private var trimmedPrompt: String { @@ -65,8 +66,12 @@ struct GenerateVideoView: View { .padding(.bottom) } .onDisappear { + previewLoadTask?.cancel() + previewLoadTask = nil videoFetcher.cancelGeneration() videoFetcher.reset() + selectedItem = nil + selectedMediaPreview = nil } .navigationTitle(model.rawValue) .navigationBarTitleDisplayMode(.inline) @@ -176,12 +181,16 @@ struct GenerateVideoView: View { } private func handleSelectionChange(_ item: PhotosPickerItem?) { + previewLoadTask?.cancel() + guard let item else { selectedMediaPreview = nil return } - Task { + previewLoadTask = Task { + guard !Task.isCancelled else { return } + let resolvedType = item.supportedContentTypes.first(where: { $0.conforms(to: .video) || $0.conforms(to: .image) @@ -195,6 +204,8 @@ struct GenerateVideoView: View { previewImage = image } + guard !Task.isCancelled else { return } + await MainActor.run { selectedMediaPreview = previewImage } diff --git a/Example/Example/Views/RealtimeControlsView.swift b/Example/Example/Views/RealtimeControlsView.swift new file mode 100644 index 0000000..8d9353f --- /dev/null +++ b/Example/Example/Views/RealtimeControlsView.swift @@ -0,0 +1,250 @@ +import DecartSDK +import SwiftUI + +struct RealtimeControlsView: View { + let presets: [PromptPreset] + let connectionState: DecartRealtimeConnectionState + let onPresetSelected: (PromptPreset) -> Void + let onSwitchCamera: () -> Void + let onConnectToggle: () -> Void + + @State private var selectedPresetId: UUID? + + var body: some View { + VStack(spacing: 16) { + if connectionState == .error { + ErrorBanner() + } + + PresetChipsScrollView( + presets: presets, + selectedPresetId: $selectedPresetId, + onPresetSelected: { preset in + selectedPresetId = preset.id + onPresetSelected(preset) + } + ) + + ControlButtonsRow( + connectionState: connectionState, + onSwitchCamera: onSwitchCamera, + onConnectToggle: onConnectToggle + ) + } + .padding(16) + .background( + RoundedRectangle(cornerRadius: 20) + .fill(.ultraThinMaterial) + .overlay( + RoundedRectangle(cornerRadius: 20) + .stroke( + LinearGradient( + colors: [ + Color.white.opacity(0.3), + Color.white.opacity(0.1), + Color.clear, + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ), + lineWidth: 1 + ) + ) + ) + .padding(.horizontal, 8) + .padding(.bottom, 8) + .onAppear { + if selectedPresetId == nil, let firstPreset = presets.first { + selectedPresetId = firstPreset.id + } + } + } +} + +private struct ErrorBanner: View { + var body: some View { + Text("Connection error. Please try again.") + .font(.caption) + .fontWeight(.medium) + .foregroundStyle(.white) + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + Capsule() + .fill(Color.red.opacity(0.8)) + .overlay( + Capsule() + .stroke(Color.red.opacity(0.5), lineWidth: 1) + ) + ) + } +} + +private struct PresetChipsScrollView: View { + let presets: [PromptPreset] + @Binding var selectedPresetId: UUID? + let onPresetSelected: (PromptPreset) -> Void + + var body: some View { + ScrollView(.horizontal, showsIndicators: false) { + HStack(spacing: 10) { + ForEach(presets) { preset in + PresetChip( + preset: preset, + isSelected: selectedPresetId == preset.id, + onTap: { onPresetSelected(preset) } + ) + } + } + .padding(.horizontal, 4) + } + .frame(height: 44) + } +} + +private struct PresetChip: View { + let preset: PromptPreset + let isSelected: Bool + let onTap: () -> Void + + var body: some View { + Button(action: onTap) { + Text(preset.label) + .font(.subheadline) + .fontWeight(isSelected ? .semibold : .medium) + .foregroundStyle(isSelected ? .white : .white.opacity(0.8)) + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + Capsule() + .fill( + isSelected + ? LinearGradient( + colors: [ + Color(red: 0.4, green: 0.3, blue: 1.0), + Color(red: 0.6, green: 0.2, blue: 0.9), + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ) + : LinearGradient( + colors: [ + Color.white.opacity(0.15), + Color.white.opacity(0.08), + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ) + ) + .overlay( + Capsule() + .stroke( + isSelected + ? Color.white.opacity(0.4) + : Color.white.opacity(0.2), + lineWidth: 1 + ) + ) + ) + .shadow( + color: isSelected ? Color(red: 0.5, green: 0.3, blue: 1.0).opacity(0.5) : .clear, + radius: 8, + y: 2 + ) + } + .buttonStyle(.plain) + .animation(.easeInOut(duration: 0.2), value: isSelected) + } +} + +private struct ControlButtonsRow: View { + let connectionState: DecartRealtimeConnectionState + let onSwitchCamera: () -> Void + let onConnectToggle: () -> Void + + var body: some View { + HStack(spacing: 12) { + CameraSwitchButton(onTap: onSwitchCamera) + + Spacer() + + ConnectButton(connectionState: connectionState, onTap: onConnectToggle) + } + } +} + +private struct CameraSwitchButton: View { + let onTap: () -> Void + + var body: some View { + Button(action: onTap) { + Image(systemName: "arrow.trianglehead.2.counterclockwise.rotate.90") + .font(.system(size: 16, weight: .medium)) + .foregroundStyle(.white.opacity(0.8)) + .frame(width: 40, height: 40) + .background( + Circle() + .fill(Color.white.opacity(0.1)) + .overlay( + Circle() + .stroke(Color.white.opacity(0.2), lineWidth: 1) + ) + ) + } + .buttonStyle(.plain) + } +} + +private struct ConnectButton: View { + let connectionState: DecartRealtimeConnectionState + let onTap: () -> Void + + private var buttonGradient: LinearGradient { + if connectionState.isInSession { + return LinearGradient( + colors: [ + Color(red: 0.9, green: 0.2, blue: 0.3), + Color(red: 0.8, green: 0.1, blue: 0.2), + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ) + } else { + return LinearGradient( + colors: [ + Color(red: 0.2, green: 0.8, blue: 0.4), + Color(red: 0.1, green: 0.7, blue: 0.3), + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ) + } + } + + private var shadowColor: Color { + connectionState.isInSession + ? Color.red.opacity(0.4) + : Color.green.opacity(0.4) + } + + var body: some View { + Button(action: onTap) { + Text(connectionState.rawValue) + .font(.subheadline) + .fontWeight(.semibold) + .foregroundStyle(.white) + .padding(.horizontal, 24) + .padding(.vertical, 12) + .background( + Capsule() + .fill(buttonGradient) + .overlay( + Capsule() + .stroke(Color.white.opacity(0.3), lineWidth: 1) + ) + ) + .shadow(color: shadowColor, radius: 8, y: 2) + } + .buttonStyle(.plain) + } +} diff --git a/Example/Example/Views/RealtimeView.swift b/Example/Example/Views/RealtimeView.swift index 1cfa6ff..58d00f6 100644 --- a/Example/Example/Views/RealtimeView.swift +++ b/Example/Example/Views/RealtimeView.swift @@ -1,10 +1,3 @@ -// -// RealtimeView.swift -// Example -// -// Created by Alon Bar-el on 19/11/2025. -// - import DecartSDK import Factory import SwiftUI @@ -12,26 +5,53 @@ import WebRTC struct RealtimeView: View { private let realtimeAiModel: RealtimeModel - @State private var prompt: String = DecartConfig.defaultPrompt - - @State private var realtimeManager: RealtimeManager + private let presets: [PromptPreset] + @State private var realtimeManager: RealtimeManager? init(realtimeModel: RealtimeModel) { self.realtimeAiModel = realtimeModel - _realtimeManager = State( - initialValue: DecartRealtimeManager( - currentPrompt: Prompt( - text: DecartConfig.defaultPrompt, - enrich: false + self.presets = DecartConfig.presets(for: realtimeModel) + } + + var body: some View { + ZStack { + if let manager = realtimeManager { + RealtimeContentView( + realtimeManager: manager, + presets: presets + ) + } else { + ProgressView("Loading...") + } + } + .onAppear { + if realtimeManager == nil { + let defaultPrompt = presets.first?.prompt ?? "" + realtimeManager = RealtimeManager( + model: realtimeAiModel, + currentPrompt: Prompt(text: defaultPrompt, enrich: false) ) - ) - ) + Task { + await realtimeManager?.connect() + } + } + } + .onDisappear { + Task { [realtimeManager] in + await realtimeManager?.cleanup() + } + realtimeManager = nil + } } +} + +private struct RealtimeContentView: View { + @Bindable var realtimeManager: RealtimeManager + let presets: [PromptPreset] var body: some View { ZStack { if realtimeManager.remoteMediaStreams != nil { - // we listen to shouldMirror here since the demo reflects the user camera. RTCMLVideoViewWrapper( track: realtimeManager.remoteMediaStreams?.videoTrack, mirror: realtimeManager.shouldMirror @@ -39,9 +59,8 @@ struct RealtimeView: View { .background(Color.black) .edgesIgnoringSafeArea(.all) } - // UI overlay + VStack(spacing: 5) { - // Top bar HStack { VStack(alignment: .center, spacing: 1) { Text(realtimeManager.connectionState.rawValue) @@ -57,100 +76,30 @@ struct RealtimeView: View { Spacer() - // Local video preview if realtimeManager.connectionState.isInSession, - realtimeManager.localMediaStream != nil + let localStream = realtimeManager.localMediaStream { DraggableRTCVideoView( - track: realtimeManager.localMediaStream!.videoTrack, + track: localStream.videoTrack, mirror: realtimeManager.shouldMirror ) } - // Controls - VStack(spacing: 12) { - if realtimeManager.connectionState == .error { - Text( - "Error while connecting to decart realtime servers, please try again later." - ) - .foregroundColor(.red) - .font(.caption) - .padding(8) - .background(Color.black.opacity(0.8)) - .cornerRadius(8) - } - - HStack(spacing: 12) { - TextField("Prompt", text: $prompt) - .textFieldStyle(RoundedBorderTextFieldStyle()) - // .disabled(!viewModel.isConnected) - - Button(action: { - Task { - realtimeManager.currentPrompt = Prompt(text: prompt, enrich: false) - } - }) { - Image(systemName: "paperplane.fill") - .foregroundColor(.white) - .padding(12) - .background( - realtimeManager.connectionState.isConnected - ? Color.blue : Color.gray - ) - .cornerRadius(8) + RealtimeControlsView( + presets: presets, + connectionState: realtimeManager.connectionState, + onPresetSelected: { preset in + realtimeManager.currentPrompt = Prompt(text: preset.prompt, enrich: false) + }, + onSwitchCamera: { Task { await realtimeManager.switchCamera() } }, + onConnectToggle: { + if realtimeManager.connectionState.isInSession { + Task { await realtimeManager.cleanup() } + } else { + Task { await realtimeManager.connect() } } - // .disabled(!viewModel.isConnected) } - - HStack(spacing: 12) { - Toggle("Mirror", isOn: $realtimeManager.shouldMirror) - .toggleStyle(SwitchToggleStyle(tint: .blue)) - // .disabled(!viewModel.isConnected) - Button(action: { - Task { - await realtimeManager.switchCamera() - } - }) { - Image(systemName: "arrow.trianglehead.2.counterclockwise.rotate.90") - } - Spacer() - - Button(action: { - if realtimeManager.connectionState.isInSession { - Task { - await realtimeManager.cleanup() - } - } else { - let model = self.realtimeAiModel // Capture value - Task { - await realtimeManager.connect(model: model) - } - } - }) { - Text( - realtimeManager.connectionState.rawValue - ) - .fontWeight(.semibold) - .foregroundColor(.white) - .frame(maxWidth: .infinity) - .padding() - .background( - realtimeManager.connectionState.isConnected - ? Color.red : Color.green - ) - .cornerRadius(12) - } - } - } - .padding() - .background(Color.black.opacity(0.8)) - .cornerRadius(16) - .padding(.all, 5) - .onDisappear { - Task { [realtimeManager] in - await realtimeManager.cleanup() - } - } + ) } } } diff --git a/Package.resolved b/Package.resolved index e5b2e01..f1d0374 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,5 +1,33 @@ { + "originHash" : "785f4c91530100bd3dbb4a3bc1b4980520c27ac047f0c5fbfa119f340d376698", "pins" : [ + { + "identity" : "async-extensions", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/async-extensions.git", + "state" : { + "revision" : "3088474141debc75b78257a0db28adf734bcea0f", + "version" : "4.4.0" + } + }, + { + "identity" : "dispatch-timer", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/dispatch-timer.git", + "state" : { + "revision" : "2d8c304aa6f382a7a362cd5a814884f3930c5662", + "version" : "3.0.1" + } + }, + { + "identity" : "synchronized", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/synchronized.git", + "state" : { + "revision" : "85653e23270ec88ae19f8d494157769487e34aed", + "version" : "4.0.1" + } + }, { "identity" : "webrtc", "kind" : "remoteSourceControl", @@ -8,7 +36,16 @@ "revision" : "86dbb5cb57e4da009b859a6245b1c10d610f215a", "version" : "140.0.0" } + }, + { + "identity" : "websocket-apple", + "kind" : "remoteSourceControl", + "location" : "https://github.com/shareup/websocket-apple.git", + "state" : { + "revision" : "a176fc4f7b9f7ad4f0a1fa245a2bbb024658a30e", + "version" : "4.1.0" + } } ], - "version" : 2 + "version" : 3 } diff --git a/Package.swift b/Package.swift index 83c615b..087b855 100644 --- a/Package.swift +++ b/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version: 6.0.3 +// swift-tools-version: 6.2.1 import PackageDescription let package = Package( @@ -14,13 +14,18 @@ let package = Package( ) ], dependencies: [ - .package(url: "https://github.com/stasel/WebRTC.git", from: "140.0.0") + .package(url: "https://github.com/stasel/WebRTC.git", from: "140.0.0"), + .package( + url: "https://github.com/shareup/websocket-apple.git", + from: "4.1.0" + ) ], targets: [ .target( name: "DecartSDK", dependencies: [ - .product(name: "WebRTC", package: "WebRTC") + .product(name: "WebRTC", package: "WebRTC"), + .product(name: "WebSocket", package: "websocket-apple") ], path: "Sources/DecartSDK" ) diff --git a/README.md b/README.md index 41b2f83..cb56d40 100644 --- a/README.md +++ b/README.md @@ -12,20 +12,20 @@ Native Swift SDK for [Decart AI](https://decart.ai) - Real-time video processing Decart iOS SDK provides two primary APIs: -- **RealtimeClient** - Real-time video processing with WebRTC streaming +- **RealtimeManager** - Real-time video processing with WebRTC streaming - **ProcessClient** - Batch image and video generation Both APIs leverage modern Swift concurrency (async/await) with type-safe interfaces and comprehensive error handling. ## Features -- ✅ **Real-time video processing** with WebRTC -- ✅ **Batch image and video generation** -- ✅ **Native Swift** with modern concurrency (async/await) -- ✅ **AsyncStream events** for reactive state management -- ✅ **Type-safe API** with compile-time guarantees -- ✅ **iOS 15+** and **macOS 12+** support -- ✅ **SwiftUI** ready +- Real-time video processing with WebRTC +- Batch image and video generation +- Native Swift with modern concurrency (async/await) +- AsyncStream events for reactive state management +- Type-safe API with compile-time guarantees +- iOS 15+ and macOS 12+ support +- SwiftUI ready ## Installation @@ -52,34 +52,23 @@ Stream video with real-time AI processing using WebRTC: ```swift import DecartSDK -import WebRTC -// Configure SDK let config = DecartConfiguration(apiKey: "your-api-key") let client = DecartClient(decartConfiguration: config) -// Create realtime client -let model = Models.realtime(.mirage) -let realtimeClient = try client.createRealtimeClient( +let model: RealtimeModel = .mirage +let modelConfig = Models.realtime(model) + +let realtimeManager = try client.createRealtimeManager( options: RealtimeConfiguration( - model: model, + model: modelConfig, initialState: ModelState(prompt: Prompt(text: "Lego World")) ) ) -// Capture local camera stream -let (localStream, cameraCapturer) = try await RealtimeCameraCapture.captureLocalCameraStream( - realtimeClient: realtimeClient, - cameraFacing: .front -) - -// Connect and receive remote stream -let remoteStream = try await realtimeClient.connect(localStream: localStream) -remoteStream.videoTrack.add(videoRenderer) - // Listen to connection events Task { - for await state in realtimeClient.events { + for await state in realtimeManager.events { switch state { case .connected: print("Connected") @@ -93,14 +82,29 @@ Task { } } +// Create video source and camera capture +let videoSource = realtimeManager.createVideoSource() +let capture = RealtimeCapture(model: modelConfig, videoSource: videoSource) +try await capture.startCapture() + +// Create local stream and connect +let videoTrack = realtimeManager.createVideoTrack(source: videoSource, trackId: "video0") +let localStream = RealtimeMediaStream(videoTrack: videoTrack, id: .localStream) +let remoteStream = try await realtimeManager.connect(localStream: localStream) + // Update prompt in real-time -realtimeClient.setPrompt(Prompt(text: "Anime World")) +realtimeManager.setPrompt(Prompt(text: "Anime World")) + +// Send reference image (base64) to the realtime session +try await realtimeManager.setImageBase64( + imageBase64String, + prompt: "Use this as reference", + enhance: true +) // Cleanup -defer { - cameraCapturer.stopCapture(completionHandler: {}) - Task { await realtimeClient.disconnect() } -} +await capture.stopCapture() +await realtimeManager.disconnect() ``` ### 2. Text-to-Image Generation @@ -110,173 +114,224 @@ Generate images from text prompts: ```swift import DecartSDK -// Configure SDK let config = DecartConfiguration(apiKey: "your-api-key") let client = DecartClient(decartConfiguration: config) -// Create input -let input = TextToImageInput(prompt: "Retro robot in neon city") +let input = try TextToImageInput(prompt: "Retro robot in neon city") -// Create process client let processClient = try client.createProcessClient( model: .lucy_pro_t2i, input: input ) -// Generate image let imageData = try await processClient.process() let image = UIImage(data: imageData) ``` -### 3. Image-to-Video Generation +### 3. Image-to-Image Generation + +Transform images with AI: + +```swift +import DecartSDK + +let config = DecartConfiguration(apiKey: "your-api-key") +let client = DecartClient(decartConfiguration: config) + +let imageData = try Data(contentsOf: referenceImageURL) +let fileInput = try FileInput.image(data: imageData) + +let input = try ImageToImageInput(prompt: "Make it cyberpunk", data: fileInput) + +let processClient = try client.createProcessClient( + model: .lucy_pro_i2i, + input: input +) + +let resultData = try await processClient.process() +let image = UIImage(data: resultData) +``` + +### 4. Image-to-Video Generation Generate videos from reference images: ```swift import DecartSDK -import UniformTypeIdentifiers -// Configure SDK let config = DecartConfiguration(apiKey: "your-api-key") let client = DecartClient(decartConfiguration: config) -// Load reference image let imageData = try Data(contentsOf: referenceImageURL) -let fileInput = try FileInput.from(data: imageData, uniformType: .jpeg) +let fileInput = try FileInput.image(data: imageData) -// Create input -let input = ImageToVideoInput(prompt: "Make it dance", data: fileInput) +let input = try ImageToVideoInput(prompt: "Make it dance", data: fileInput) -// Create process client let processClient = try client.createProcessClient( model: .lucy_pro_i2v, input: input ) -// Generate video let videoData = try await processClient.process() try videoData.write(to: outputURL) ``` -## API Reference +### 5. Video-to-Video Generation + +Transform videos with AI: + +```swift +import DecartSDK + +let config = DecartConfiguration(apiKey: "your-api-key") +let client = DecartClient(decartConfiguration: config) + +let videoData = try Data(contentsOf: referenceVideoURL) +let fileInput = try FileInput.video(data: videoData) + +let input = try VideoToVideoInput(prompt: "Apply anime style", data: fileInput) + +let processClient = try client.createProcessClient( + model: .lucy_pro_v2v, + input: input +) -### Core Configuration +let resultData = try await processClient.process() +try resultData.write(to: outputURL) +``` -#### DecartConfiguration +## API Reference -Initialize the SDK with your API credentials: +### DecartConfiguration ```swift let config = DecartConfiguration( - baseURL: "https://api3.decart.ai", // Optional, defaults to api3.decart.ai + baseURL: "https://api3.decart.ai", // Optional apiKey: "your-api-key" ) ``` -#### DecartClient - -Main entry point for creating realtime and process clients: +### DecartClient ```swift let client = DecartClient(decartConfiguration: config) -``` -### RealtimeClient +// Create realtime manager +func createRealtimeManager(options: RealtimeConfiguration) throws -> RealtimeManager -Real-time video streaming with WebRTC. +// Create process clients +func createProcessClient(model: ImageModel, input: TextToImageInput) throws -> ProcessClient +func createProcessClient(model: ImageModel, input: ImageToImageInput) throws -> ProcessClient +func createProcessClient(model: VideoModel, input: TextToVideoInput) throws -> ProcessClient +func createProcessClient(model: VideoModel, input: ImageToVideoInput) throws -> ProcessClient +func createProcessClient(model: VideoModel, input: VideoToVideoInput) throws -> ProcessClient +``` -#### Methods +### RealtimeManager ```swift -func createRealtimeClient(options: RealtimeConfiguration) throws -> RealtimeClient func connect(localStream: RealtimeMediaStream) async throws -> RealtimeMediaStream func disconnect() async func setPrompt(_ prompt: Prompt) -``` - -#### Events +func setImageBase64(_ imageBase64: String?, prompt: String?, enhance: Bool?, timeout: TimeInterval?) async throws +func getStats() async -> RTCStatisticsReport? -```swift let events: AsyncStream - // States: .idle, .connecting, .connected, .disconnected, .error ``` -#### Available Models - -```swift -Models.realtime(.mirage) -Models.realtime(.mirage_v2) -Models.realtime(.lucy_v2v_720p_rt) -``` - ### ProcessClient -Batch image and video generation. - -#### Methods - ```swift -func createProcessClient(model: ImageModel, input: TextToImageInput) throws -> ProcessClient -func createProcessClient(model: ImageModel, input: ImageToImageInput) throws -> ProcessClient -func createProcessClient(model: VideoModel, input: TextToVideoInput) throws -> ProcessClient -func createProcessClient(model: VideoModel, input: ImageToVideoInput) throws -> ProcessClient -func createProcessClient(model: VideoModel, input: VideoToVideoInput) throws -> ProcessClient - func process() async throws -> Data ``` -#### Available Models +### Available Models + +**Realtime Models:** +- `RealtimeModel.mirage` +- `RealtimeModel.mirage_v2` +- `RealtimeModel.lucy_v2v_720p_rt` **Image Models:** -- `.lucy_pro_t2i` - Text to image -- `.lucy_pro_i2i` - Image to image +- `ImageModel.lucy_pro_t2i` - Text to image +- `ImageModel.lucy_pro_i2i` - Image to image **Video Models:** -- `.lucy_pro_t2v` - Text to video -- `.lucy_pro_i2v` - Image to video -- `.lucy_pro_v2v` - Video to video -- `.lucy_dev_i2v` - Image to video (dev) -- `.lucy_dev_v2v` - Video to video (dev) +- `VideoModel.lucy_pro_t2v` - Text to video +- `VideoModel.lucy_pro_i2v` - Image to video +- `VideoModel.lucy_pro_v2v` - Video to video +- `VideoModel.lucy_dev_i2v` - Image to video (dev) +- `VideoModel.lucy_fast_v2v` - Fast video to video ### Input Types ```swift // Text-based inputs -TextToImageInput(prompt: String, seed: Int? = nil, resolution: ProResolution? = .res720p) -TextToVideoInput(prompt: String, seed: Int? = nil, resolution: ProResolution? = .res720p) +TextToImageInput(prompt: String, seed: Int?, resolution: ProResolution?) +TextToVideoInput(prompt: String, seed: Int?, resolution: ProResolution?) // File-based inputs -ImageToImageInput(prompt: String, data: FileInput, seed: Int? = nil) -ImageToVideoInput(prompt: String, data: FileInput, seed: Int? = nil) -VideoToVideoInput(prompt: String, data: FileInput, seed: Int? = nil) +ImageToImageInput(prompt: String, data: FileInput, seed: Int?) +ImageToVideoInput(prompt: String, data: FileInput, seed: Int?) +VideoToVideoInput(prompt: String, data: FileInput, seed: Int?) // File input helpers -FileInput.image(data: Data, filename: String = "image.jpg") -FileInput.video(data: Data, filename: String = "video.mp4") +FileInput.image(data: Data, filename: String) +FileInput.video(data: Data, filename: String) FileInput.from(data: Data, uniformType: UTType?) ``` +### RealtimeConfiguration + +```swift +RealtimeConfiguration( + model: ModelDefinition, + initialState: ModelState, + connection: ConnectionConfig, // Optional + media: MediaConfig // Optional +) + +// Connection config +ConnectionConfig( + iceServers: [String], + connectionTimeout: Int32, + pingInterval: Int32 +) + +// Media config +MediaConfig( + video: VideoConfig +) + +// Video config +VideoConfig( + maxBitrate: Int, + minBitrate: Int, + maxFramerate: Int, + preferredCodec: String // "VP8" or "H264" +) +``` + ## Requirements - iOS 15.0+ / macOS 12.0+ - Swift 5.9+ - Xcode 15.0+ -## Architecture +## Environment Variables -The SDK follows Swift best practices: +Configure these in your Xcode scheme (Edit Scheme → Run → Environment Variables): -- **Value types** (structs) for configuration and data models -- **Reference types** (classes) for connection management -- **AsyncStream** for reactive event streams -- **async/await** for asynchronous operations -- **Structured concurrency** with Task-based cancellation -- **Type-safe protocols** for proper Swift error handling +| Variable | Required | Description | +|----------|----------|-------------| +| `DECART_API_KEY` | Yes | Your Decart API key from [platform.decart.ai](https://platform.decart.ai) | +| `DECART_DEFAULT_PROMPT` | No | Default prompt for realtime sessions (defaults to "Simpsons") | +| `ENABLE_DECART_SDK_DUBUG_LOGS` | No | Set to `YES` to enable verbose SDK logging | ## Dependencies -- [WebRTC](https://github.com/stasel/WebRTC) - WebRTC framework for iOS/macOS +- [WebRTC](https://github.com/nickkjordan/WebRTC) - WebRTC framework for iOS/macOS ## License diff --git a/Sources/DecartSDK/DecartSDK.swift b/Sources/DecartSDK/API/DecartClient.swift similarity index 64% rename from Sources/DecartSDK/DecartSDK.swift rename to Sources/DecartSDK/API/DecartClient.swift index b4ead5a..c202c6c 100644 --- a/Sources/DecartSDK/DecartSDK.swift +++ b/Sources/DecartSDK/API/DecartClient.swift @@ -1,35 +1,5 @@ import Foundation -public struct DecartConfiguration { - public let baseURL: URL - public let apiKey: String - - var headers: [String: String] { ["Authorization": "Bearer \(apiKey)"] } - - var signalingServerUrl: String { - var baseURLString = baseURL.absoluteString - if baseURLString.hasPrefix("https://") { - baseURLString = baseURLString.replacingOccurrences(of: "https://", with: "wss://") - } else if baseURLString.hasPrefix("http://") { - baseURLString = baseURLString.replacingOccurrences(of: "http://", with: "ws://") - } - return baseURLString - } - - public init(baseURL: String = "https://api3.decart.ai", apiKey: String) { - guard let url = URL(string: baseURL) else { - DecartLogger.log("Unable to create URL from: \(baseURL)", level: .error) - fatalError("Unable to create URL from: \(baseURL)") - } - guard !apiKey.isEmpty else { - DecartLogger.log("API key is empty", level: .error) - fatalError("Api key is empty") - } - self.baseURL = url - self.apiKey = apiKey - } -} - public struct DecartClient { let decartConfiguration: DecartConfiguration @@ -37,7 +7,7 @@ public struct DecartClient { self.decartConfiguration = decartConfiguration } - public func createRealtimeClient(options: RealtimeConfiguration) throws -> RealtimeClient { + public func createRealtimeManager(options: RealtimeConfiguration) throws -> DecartRealtimeManager { let urlString = "\(decartConfiguration.signalingServerUrl)\(options.model.urlPath)?api_key=\(decartConfiguration.apiKey)&model=\(options.model.name)" @@ -46,7 +16,7 @@ public struct DecartClient { throw DecartError.invalidBaseURL(urlString) } - return try RealtimeClient( + return DecartRealtimeManager( signalingServerURL: signalingServerURL, options: options ) diff --git a/Sources/DecartSDK/API/DecartConfiguration.swift b/Sources/DecartSDK/API/DecartConfiguration.swift new file mode 100644 index 0000000..72847cc --- /dev/null +++ b/Sources/DecartSDK/API/DecartConfiguration.swift @@ -0,0 +1,31 @@ +import Foundation + +public struct DecartConfiguration { + public let baseURL: URL + public let apiKey: String + + var headers: [String: String] { ["Authorization": "Bearer \(apiKey)"] } + + var signalingServerUrl: String { + var baseURLString = baseURL.absoluteString + if baseURLString.hasPrefix("https://") { + baseURLString = baseURLString.replacingOccurrences(of: "https://", with: "wss://") + } else if baseURLString.hasPrefix("http://") { + baseURLString = baseURLString.replacingOccurrences(of: "http://", with: "ws://") + } + return baseURLString + } + + public init(baseURL: String = "https://api.decart.ai", apiKey: String) { + guard let url = URL(string: baseURL) else { + DecartLogger.log("Unable to create URL from: \(baseURL)", level: .error) + fatalError("Unable to create URL from: \(baseURL)") + } + guard !apiKey.isEmpty else { + DecartLogger.log("API key is empty", level: .error) + fatalError("Api key is empty") + } + self.baseURL = url + self.apiKey = apiKey + } +} diff --git a/Sources/DecartSDK/Capture/CaptureDataTypes.swift b/Sources/DecartSDK/Capture/CameraError.swift similarity index 89% rename from Sources/DecartSDK/Capture/CaptureDataTypes.swift rename to Sources/DecartSDK/Capture/CameraError.swift index 9264a73..de96537 100644 --- a/Sources/DecartSDK/Capture/CaptureDataTypes.swift +++ b/Sources/DecartSDK/Capture/CameraError.swift @@ -7,6 +7,7 @@ enum CameraError: Error { case simulatorUnsupported + case noCameraDeviceAvailable case noFrontCameraDetected case noBackCameraDetected case noSupportedFormatFound @@ -16,6 +17,7 @@ enum CameraError: Error { var errorDescription: String? { switch self { case .simulatorUnsupported: return "Camera is not available on the simulator." + case .noCameraDeviceAvailable: return "No camera device is available." case .noFrontCameraDetected: return "No front camera detected." case .noSupportedFormatFound: return "No supported camera format found for the requested resolution." case .noSuitableFPSRange: return "No suitable FPS range available for the requested FPS." diff --git a/Sources/DecartSDK/Capture/CaptureExtensions.swift b/Sources/DecartSDK/Capture/CaptureExtensions.swift index caf3a6f..96ae3e1 100644 --- a/Sources/DecartSDK/Capture/CaptureExtensions.swift +++ b/Sources/DecartSDK/Capture/CaptureExtensions.swift @@ -8,13 +8,25 @@ import AVFoundation import WebRTC public extension AVCaptureDevice { - /// Pick a format that meets (or exceeds) the requested dimensions; falls back to the first available. + static func availableCameras() -> [AVCaptureDevice] { + RTCCameraVideoCapturer.captureDevices().sorted { + let nameCompare = $0.localizedName.localizedStandardCompare($1.localizedName) + if nameCompare != .orderedSame { + return nameCompare == .orderedAscending + } + return $0.uniqueID < $1.uniqueID + } + } + + /// Pick a format that meets (or exceeds) the requested dimensions in either orientation. func pickFormat(minWidth: Int, minHeight: Int) throws -> AVCaptureDevice.Format { let formats = RTCCameraVideoCapturer.supportedFormats(for: self) if let match = formats.first(where: { let d = CMVideoFormatDescriptionGetDimensions($0.formatDescription) - return d.width >= minWidth && d.height >= minHeight + let landscape = d.width >= minWidth && d.height >= minHeight + let portrait = d.height >= minWidth && d.width >= minHeight + return landscape || portrait }) { return match } @@ -40,11 +52,45 @@ public extension AVCaptureDevice { throw CameraError.noSuitableFPSRange } - static func pickCamera(position: AVCaptureDevice.Position) throws -> AVCaptureDevice { - let devices = RTCCameraVideoCapturer.captureDevices() - guard let front = devices.first(where: { $0.position == position }) else { + static func pickCamera( + position: AVCaptureDevice.Position, + fallbackToAny: Bool = false + ) throws -> AVCaptureDevice { + let devices = availableCameras() + guard !devices.isEmpty else { + throw CameraError.noCameraDeviceAvailable + } + + if let matchingDevice = devices.first(where: { $0.position == position }) { + return matchingDevice + } + + if fallbackToAny, let firstDevice = devices.first { + return firstDevice + } + + switch position { + case .front: throw CameraError.noFrontCameraDetected + case .back: + throw CameraError.noBackCameraDetected + default: + throw CameraError.noCameraDeviceAvailable } - return front + } + + static func nextCamera(after currentDeviceID: String?) -> AVCaptureDevice? { + let devices = availableCameras() + guard !devices.isEmpty else { return nil } + + guard + let currentDeviceID, + let currentIndex = devices.firstIndex(where: { $0.uniqueID == currentDeviceID }) + else { + return devices.first + } + + let nextIndex = (currentIndex + 1) % devices.count + return devices[nextIndex] } } diff --git a/Sources/DecartSDK/Capture/RealtimeCameraCapture.swift b/Sources/DecartSDK/Capture/RealtimeCameraCapture.swift deleted file mode 100644 index 038282e..0000000 --- a/Sources/DecartSDK/Capture/RealtimeCameraCapture.swift +++ /dev/null @@ -1,81 +0,0 @@ -// -// CaptureUtils.swift -// DecartSDK -// -// Created by Alon Bar-el on 05/11/2025. -// -import AVFoundation -import WebRTC - -#if !targetEnvironment(simulator) -public enum RealtimeCameraCapture { - public static func captureLocalCameraStream(realtimeClient: RealtimeClient, cameraFacing: AVCaptureDevice.Position) async throws -> ( - RealtimeMediaStream, - RTCCameraVideoCapturer - ) { - let currentRealtimeModel = realtimeClient.options.model - // 1) Source & capturer - let videoSource = realtimeClient.createVideoSource() - let capturer = RTCCameraVideoCapturer(delegate: videoSource) - - let device = try AVCaptureDevice.pickCamera(position: cameraFacing) - let format = try device.pickFormat( - minWidth: currentRealtimeModel.width, - minHeight: currentRealtimeModel.height - ) - let targetFPS = try device.pickFPS(for: format, preferred: currentRealtimeModel.fps) - - // 3) Start capture - try await startCameraCapture(capturer: capturer, device: device, format: format, fps: targetFPS) - let localVideoTrack = realtimeClient.createVideoTrack( - source: videoSource, - trackId: "video0" - ) - // 4) Create track & stream - return ( - RealtimeMediaStream(videoTrack: localVideoTrack, id: .localStream), - capturer - ) - } - - private static func startCameraCapture( - capturer: RTCCameraVideoCapturer, - device: AVCaptureDevice, - format: AVCaptureDevice.Format, - fps: Int - ) async throws { - try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in - capturer.startCapture(with: device, format: format, fps: fps) { error in - if let error { cont.resume(throwing: error) } - else { cont.resume() } - } - } - } - - @discardableResult - public static func switchCamera( - capturer: RTCCameraVideoCapturer, - realtimeClient: RealtimeClient, - newPosition: AVCaptureDevice.Position - ) async throws -> AVCaptureDevice.Position { - let currentRealtimeModel = realtimeClient.options.model - let newPosition: AVCaptureDevice.Position = newPosition - - let newDevice = try AVCaptureDevice.pickCamera(position: newPosition) - let format = try newDevice.pickFormat( - minWidth: currentRealtimeModel.width, - minHeight: currentRealtimeModel.height - ) - let targetFPS = try newDevice.pickFPS(for: format, preferred: currentRealtimeModel.fps) - - try await startCameraCapture( - capturer: capturer, - device: newDevice, - format: format, - fps: targetFPS - ) - - return newPosition - } -} -#endif diff --git a/Sources/DecartSDK/Capture/RealtimeCapture.swift b/Sources/DecartSDK/Capture/RealtimeCapture.swift new file mode 100644 index 0000000..5e477aa --- /dev/null +++ b/Sources/DecartSDK/Capture/RealtimeCapture.swift @@ -0,0 +1,124 @@ +import AVFoundation +@preconcurrency import WebRTC + +public enum CaptureOrientation: Sendable { + case portrait + case landscape +} + +#if !targetEnvironment(simulator) +public final class RealtimeCapture: @unchecked Sendable { + public private(set) var position: AVCaptureDevice.Position + public let orientation: CaptureOrientation + public let targetWidth: Int + public let targetHeight: Int + + public var captureSession: AVCaptureSession { capturer.captureSession } + + private let model: ModelDefinition + private let videoSource: RTCVideoSource + private let capturer: RTCCameraVideoCapturer + private var activeDeviceID: String? + + public init( + model: ModelDefinition, + videoSource: RTCVideoSource, + orientation: CaptureOrientation = .portrait, + initialPosition: AVCaptureDevice.Position = .front + ) { + self.model = model + self.videoSource = videoSource + self.orientation = orientation + self.position = initialPosition + + switch orientation { + case .landscape: + self.targetWidth = model.width + self.targetHeight = model.height + case .portrait: + self.targetWidth = model.height + self.targetHeight = model.width + } + + self.capturer = RTCCameraVideoCapturer(delegate: videoSource) + } + + public func startCapture() async throws { + #if os(macOS) + try await startCapture(position: position, fallbackToAny: true) + #else + try await startCapture(position: position, fallbackToAny: false) + #endif + } + + public func switchCamera() async throws { + #if os(macOS) + let devices = AVCaptureDevice.availableCameras() + guard devices.count > 1 else { return } + + let currentDeviceID: String + if let activeDeviceID { + currentDeviceID = activeDeviceID + } else { + currentDeviceID = try AVCaptureDevice.pickCamera( + position: position, + fallbackToAny: true + ).uniqueID + } + guard let nextDevice = AVCaptureDevice.nextCamera(after: currentDeviceID) else { + throw CameraError.noCameraDeviceAvailable + } + + guard nextDevice.uniqueID != currentDeviceID else { return } + try await startCapture(with: nextDevice) + position = nextDevice.position + #else + let newPosition: AVCaptureDevice.Position = position == .front ? .back : .front + try await startCapture(position: newPosition, fallbackToAny: false) + position = newPosition + #endif + } + + public func stopCapture() async { + await withCheckedContinuation { (continuation: CheckedContinuation) in + capturer.stopCapture { continuation.resume() } + } + + let session = capturer.captureSession + session.beginConfiguration() + session.outputs.forEach { session.removeOutput($0) } + session.inputs.forEach { session.removeInput($0) } + session.commitConfiguration() + activeDeviceID = nil + } + + private func startCapture( + position: AVCaptureDevice.Position, + fallbackToAny: Bool + ) async throws { + let device = try AVCaptureDevice.pickCamera(position: position, fallbackToAny: fallbackToAny) + try await startCapture(with: device) + self.position = device.position + } + + private func startCapture(with device: AVCaptureDevice) async throws { + let format = try device.pickFormat(minWidth: targetWidth, minHeight: targetHeight) + let targetFPS = try device.pickFPS(for: format, preferred: model.fps) + + videoSource.adaptOutputFormat( + toWidth: Int32(targetWidth), + height: Int32(targetHeight), + fps: Int32(targetFPS) + ) + + try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in + capturer.startCapture(with: device, format: format, fps: targetFPS) { error in + if let error { continuation.resume(throwing: error) } + else { continuation.resume() } + } + } + + activeDeviceID = device.uniqueID + } +} +#endif diff --git a/Sources/DecartSDK/Models/Inputs/InputSupport.swift b/Sources/DecartSDK/Models/Inputs/InputSupport.swift new file mode 100644 index 0000000..b2eea6f --- /dev/null +++ b/Sources/DecartSDK/Models/Inputs/InputSupport.swift @@ -0,0 +1,123 @@ +import Foundation +import UniformTypeIdentifiers + +public enum ProResolution: String, Codable, Sendable { + case res720p = "720p" + case res480p = "480p" +} + +public enum DevResolution: String, Codable, Sendable { + case res720p = "720p" +} + +public enum InputValidationError: LocalizedError { + case emptyPrompt + case emptyFileData + case expectedImage + case expectedVideo + case unsupportedMediaType + + public var errorDescription: String? { + switch self { + case .emptyPrompt: + return "Prompt cannot be empty" + case .emptyFileData: + return "File data cannot be empty" + case .expectedImage: + return "Expected an image file" + case .expectedVideo: + return "Expected a video file" + case .unsupportedMediaType: + return "Unsupported media type. Only image and video files are supported" + } + } +} + +public enum MediaType: Sendable { + case image + case video +} + +public struct FileInput: Codable, Sendable { + public let data: Data + public let filename: String + public let mediaType: MediaType + + private enum CodingKeys: String, CodingKey { + case data, filename + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.data = try container.decode(Data.self, forKey: .data) + self.filename = try container.decode(String.self, forKey: .filename) + self.mediaType = FileInput.inferMediaType(from: filename) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(data, forKey: .data) + try container.encode(filename, forKey: .filename) + } + + private init(data: Data, filename: String, mediaType: MediaType) { + self.data = data + self.filename = filename + self.mediaType = mediaType + } + + public static func image(data: Data, filename: String = "image.jpg") throws -> FileInput { + guard !data.isEmpty else { throw InputValidationError.emptyFileData } + return FileInput( + data: data, + filename: ensureExtension(for: filename, defaultExtension: "jpg"), + mediaType: .image + ) + } + + public static func video(data: Data, filename: String = "video.mp4") throws -> FileInput { + guard !data.isEmpty else { throw InputValidationError.emptyFileData } + return FileInput( + data: data, + filename: ensureExtension(for: filename, defaultExtension: "mp4"), + mediaType: .video + ) + } + + public static func from(data: Data, uniformType: UTType?) throws -> FileInput { + guard !data.isEmpty else { throw InputValidationError.emptyFileData } + + if let type = uniformType, type.conforms(to: .image) { + return try image(data: data) + } + + if let type = uniformType, type.conforms(to: .video) || type.conforms(to: .movie) { + return try video(data: data) + } + + throw InputValidationError.unsupportedMediaType + } + + private static func ensureExtension(for filename: String, defaultExtension: String) -> String { + var trimmed = (filename as NSString).lastPathComponent + if trimmed.isEmpty { + trimmed = "attachment.\(defaultExtension)" + } + + if (trimmed as NSString).pathExtension.isEmpty { + trimmed.append(".\(defaultExtension)") + } + + return trimmed + } + + private static func inferMediaType(from filename: String) -> MediaType { + let ext = (filename as NSString).pathExtension.lowercased() + switch ext { + case "jpg", "jpeg", "png", "heic", "webp": + return .image + default: + return .video + } + } +} diff --git a/Sources/DecartSDK/Models/Inputs/ModelInputs.swift b/Sources/DecartSDK/Models/Inputs/ModelInputs.swift new file mode 100644 index 0000000..a293674 --- /dev/null +++ b/Sources/DecartSDK/Models/Inputs/ModelInputs.swift @@ -0,0 +1,123 @@ +import Foundation + +public struct TextToVideoInput: Codable, Sendable { + public let prompt: String + public let seed: Int? + public let resolution: ProResolution? + public let orientation: String? + + public init( + prompt: String, + seed: Int? = nil, + resolution: ProResolution? = .res720p, + orientation: String? = nil + ) throws { + let trimmed = prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { throw InputValidationError.emptyPrompt } + + self.prompt = trimmed + self.seed = seed + self.resolution = resolution + self.orientation = orientation + } +} + +public struct TextToImageInput: Codable, Sendable { + public let prompt: String + public let seed: Int? + public let resolution: ProResolution? + public let orientation: String? + + public init( + prompt: String, + seed: Int? = nil, + resolution: ProResolution? = .res720p, + orientation: String? = nil + ) throws { + let trimmed = prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { throw InputValidationError.emptyPrompt } + + self.prompt = trimmed + self.seed = seed + self.resolution = resolution + self.orientation = orientation + } +} + +public struct ImageToVideoInput: Codable, Sendable { + public let prompt: String + public let data: FileInput + public let seed: Int? + public let resolution: ProResolution? + + public init( + prompt: String, + data: FileInput, + seed: Int? = nil, + resolution: ProResolution? = .res720p + ) throws { + let trimmed = prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { throw InputValidationError.emptyPrompt } + guard data.mediaType == .image else { throw InputValidationError.expectedImage } + + self.prompt = trimmed + self.data = data + self.seed = seed + self.resolution = resolution + } +} + +public struct ImageToImageInput: Codable, Sendable { + public let prompt: String + public let data: FileInput + public let seed: Int? + public let resolution: ProResolution? + public let enhancePrompt: Bool? + + public init( + prompt: String, + data: FileInput, + seed: Int? = nil, + resolution: ProResolution? = .res720p, + enhancePrompt: Bool? = nil + ) throws { + let trimmed = prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { throw InputValidationError.emptyPrompt } + guard data.mediaType == .image else { throw InputValidationError.expectedImage } + + self.prompt = trimmed + self.data = data + self.seed = seed + self.resolution = resolution + self.enhancePrompt = enhancePrompt + } +} + +public struct VideoToVideoInput: Codable, Sendable { + public let prompt: String + public let data: FileInput + public let seed: Int? + public let resolution: ProResolution? + public let enhancePrompt: Bool? + public let numInferenceSteps: Int? + + public init( + prompt: String, + data: FileInput, + seed: Int? = nil, + resolution: ProResolution? = .res720p, + enhancePrompt: Bool? = nil, + numInferenceSteps: Int? = nil + ) throws { + let trimmed = prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { throw InputValidationError.emptyPrompt } + guard data.mediaType == .video else { throw InputValidationError.expectedVideo } + + self.prompt = trimmed + self.data = data + self.seed = seed + self.resolution = resolution + self.enhancePrompt = enhancePrompt + self.numInferenceSteps = numInferenceSteps + } +} diff --git a/Sources/DecartSDK/Models/Inputs/ModelsInputFactory.swift b/Sources/DecartSDK/Models/Inputs/ModelsInputFactory.swift new file mode 100644 index 0000000..3702379 --- /dev/null +++ b/Sources/DecartSDK/Models/Inputs/ModelsInputFactory.swift @@ -0,0 +1,29 @@ +public enum ModelInputType: Sendable { + case textToVideo + case textToImage + case imageToVideo + case imageToImage + case videoToVideo +} + +public enum ModelsInputFactory: Sendable { + public static func videoInputType(for model: VideoModel) -> ModelInputType { + switch model { + case .lucy_pro_t2v: + return .textToVideo + case .lucy_dev_i2v, .lucy_pro_i2v: + return .imageToVideo + case .lucy_fast_v2v, .lucy_pro_v2v: + return .videoToVideo + } + } + + public static func imageInputType(for model: ImageModel) -> ModelInputType { + switch model { + case .lucy_pro_t2i: + return .textToImage + case .lucy_pro_i2i: + return .imageToImage + } + } +} diff --git a/Sources/DecartSDK/Models/ModelDataTypes.swift b/Sources/DecartSDK/Models/ModelDataTypes.swift index 25daecb..02c3ee9 100644 --- a/Sources/DecartSDK/Models/ModelDataTypes.swift +++ b/Sources/DecartSDK/Models/ModelDataTypes.swift @@ -11,12 +11,14 @@ public struct ModelDefinition: Sendable { public let fps: Int public let width: Int public let height: Int + public let hasReferenceImage: Bool - public init(name: String, urlPath: String, fps: Int, width: Int, height: Int) { + public init(name: String, urlPath: String, fps: Int, width: Int, height: Int, hasReferenceImage: Bool = false) { self.name = name self.urlPath = urlPath self.fps = fps self.width = width self.height = height + self.hasReferenceImage = hasReferenceImage } } diff --git a/Sources/DecartSDK/Models/ModelState.swift b/Sources/DecartSDK/Models/ModelState.swift deleted file mode 100644 index 6dd3f91..0000000 --- a/Sources/DecartSDK/Models/ModelState.swift +++ /dev/null @@ -1,19 +0,0 @@ -import Foundation - -public struct Prompt: Sendable { - public let text: String - public let enrich: Bool - - public init(text: String, enrich: Bool = true) { - self.text = text - self.enrich = enrich - } -} - -public struct ModelState: Sendable { - public let prompt: Prompt - - public init(prompt: Prompt) { - self.prompt = prompt - } -} diff --git a/Sources/DecartSDK/Models/Models.swift b/Sources/DecartSDK/Models/Models.swift index af74347..22ad866 100644 --- a/Sources/DecartSDK/Models/Models.swift +++ b/Sources/DecartSDK/Models/Models.swift @@ -9,6 +9,7 @@ public enum RealtimeModel: String, CaseIterable { case mirage case mirage_v2 case lucy_v2v_720p_rt + case lucy_v2v_14b_rt } public enum ImageModel: String, CaseIterable { @@ -18,7 +19,7 @@ public enum ImageModel: String, CaseIterable { public enum VideoModel: String, CaseIterable { case lucy_dev_i2v = "lucy-dev-i2v" - case lucy_dev_v2v = "lucy-dev-v2v" + case lucy_fast_v2v = "lucy-fast-v2v" case lucy_pro_t2v = "lucy-pro-t2v" case lucy_pro_i2v = "lucy-pro-i2v" case lucy_pro_v2v = "lucy-pro-v2v" @@ -51,6 +52,15 @@ public enum Models { width: 1280, height: 704 ) + case .lucy_v2v_14b_rt: + return ModelDefinition( + name: "lucy_v2v_14b_rt", + urlPath: "/v1/stream", + fps: 15, + width: 1280, + height: 704, + hasReferenceImage: true + ) } } @@ -85,10 +95,10 @@ public enum Models { width: 1280, height: 704 ) - case .lucy_dev_v2v: + case .lucy_fast_v2v: return ModelDefinition( - name: "lucy-dev-v2v", - urlPath: "/v1/generate/lucy-dev-v2v", + name: "lucy-fast-v2v", + urlPath: "/v1/generate/lucy-fast-v2v", fps: 25, width: 1280, height: 704 diff --git a/Sources/DecartSDK/Models/ModelsInputFactory.swift b/Sources/DecartSDK/Models/ModelsInputFactory.swift deleted file mode 100644 index b8fd645..0000000 --- a/Sources/DecartSDK/Models/ModelsInputFactory.swift +++ /dev/null @@ -1,227 +0,0 @@ -import Foundation -import UniformTypeIdentifiers - -public enum ProResolution: String, Codable, Sendable { - case res720p = "720p" - case res480p = "480p" -} - -public enum DevResolution: String, Codable, Sendable { - case res720p = "720p" -} - -public enum FileInputError: Error, LocalizedError { - case missingType - case unsupportedType - - public var errorDescription: String? { - switch self { - case .missingType: - return "Unable to determine the media type. Only image and video files are supported." - case .unsupportedType: - return "Unsupported media type. Only image and video files are supported." - } - } -} - -public struct FileInput: Codable, Sendable { - public let data: Data - public let filename: String - - public init(data: Data, filename: String) { - self.data = data - self.filename = FileInput.ensureExtension( - for: filename, - defaultExtension: FileInput.defaultExtension(forFilename: filename) - ) - } - - public static func image(data: Data, filename: String = "image.jpg") -> FileInput { - FileInput( - data: data, - filename: ensureExtension(for: filename, defaultExtension: "jpg") - ) - } - - public static func video(data: Data, filename: String = "video.mp4") -> FileInput { - FileInput( - data: data, - filename: ensureExtension(for: filename, defaultExtension: "mp4") - ) - } - - public static func from(data: Data, uniformType: UTType?) throws -> FileInput { - guard let uniformType else { - throw FileInputError.missingType - } - - if uniformType.conforms(to: .image) { - return image(data: data) - } - - if uniformType.conforms(to: .video) { - return video(data: data) - } - - throw FileInputError.unsupportedType - } - - private static func ensureExtension(for filename: String, defaultExtension: String) -> String { - var trimmed = (filename as NSString).lastPathComponent - if trimmed.isEmpty { - trimmed = "attachment.\(defaultExtension)" - } - - if (trimmed as NSString).pathExtension.isEmpty { - trimmed.append(".\(defaultExtension)") - } - - return trimmed - } - - private static func defaultExtension(forFilename filename: String) -> String { - let pathExtension = (filename as NSString).pathExtension.lowercased() - switch pathExtension { - case "jpg", "jpeg", "png", "heic": - return "jpg" - case "mp4", "mov", "m4v": - return "mp4" - default: - return "bin" - } - } -} - -public struct TextToVideoInput: Codable, Sendable { - public let prompt: String - public let seed: Int? - public let resolution: ProResolution? - public let orientation: String? - - public init( - prompt: String, - seed: Int? = nil, - resolution: ProResolution? = .res720p, - orientation: String? = nil - ) { - self.prompt = prompt - self.seed = seed - self.resolution = resolution - self.orientation = orientation - } -} - -public struct TextToImageInput: Codable, Sendable { - public let prompt: String - public let seed: Int? - public let resolution: ProResolution? - public let orientation: String? - - public init( - prompt: String, - seed: Int? = nil, - resolution: ProResolution? = .res720p, - orientation: String? = nil - ) { - self.prompt = prompt - self.seed = seed - self.resolution = resolution - self.orientation = orientation - } -} - -public struct ImageToVideoInput: Codable, Sendable { - public let prompt: String - public let data: FileInput // We need to handle how this is serialized (e.g. multipart or base64) - public let seed: Int? - public let resolution: ProResolution? // Or separate structs for dev/pro if needed, but factory can handle types - - public init( - prompt: String, - data: FileInput, - seed: Int? = nil, - resolution: ProResolution? = .res720p - ) { - self.prompt = prompt - self.data = data - self.seed = seed - self.resolution = resolution - } -} - -public struct ImageToImageInput: Codable, Sendable { - public let prompt: String - public let data: FileInput - public let seed: Int? - public let resolution: ProResolution? - public let enhancePrompt: Bool? - - public init( - prompt: String, - data: FileInput, - seed: Int? = nil, - resolution: ProResolution? = .res720p, - enhancePrompt: Bool? = nil - ) { - self.prompt = prompt - self.data = data - self.seed = seed - self.resolution = resolution - self.enhancePrompt = enhancePrompt - } -} - -public struct VideoToVideoInput: Codable, Sendable { - public let prompt: String - public let data: FileInput - public let seed: Int? - public let resolution: ProResolution? // pro supports 480p/720p, dev supports 720p. - public let enhancePrompt: Bool? - public let numInferenceSteps: Int? - - public init( - prompt: String, - data: FileInput, - seed: Int? = nil, - resolution: ProResolution? = .res720p, - enhancePrompt: Bool? = nil, - numInferenceSteps: Int? = nil - ) { - self.prompt = prompt - self.data = data - self.seed = seed - self.resolution = resolution - self.enhancePrompt = enhancePrompt - self.numInferenceSteps = numInferenceSteps - } -} - -public enum ModelInputType: Sendable { - case textToVideo - case textToImage - case imageToVideo - case imageToImage - case videoToVideo -} - -public enum ModelsInputFactory: Sendable { - public static func videoInputType(for model: VideoModel) -> ModelInputType { - switch model { - case .lucy_pro_t2v: - return .textToVideo - case .lucy_dev_i2v, .lucy_pro_i2v: - return .imageToVideo - case .lucy_dev_v2v, .lucy_pro_v2v: - return .videoToVideo - } - } - - public static func imageInputType(for model: ImageModel) -> ModelInputType { - switch model { - case .lucy_pro_t2i: - return .textToImage - case .lucy_pro_i2i: - return .imageToImage - } - } -} diff --git a/Sources/DecartSDK/Process/ProcessClient.swift b/Sources/DecartSDK/Process/ProcessClient.swift index ba60fd5..3c44479 100644 --- a/Sources/DecartSDK/Process/ProcessClient.swift +++ b/Sources/DecartSDK/Process/ProcessClient.swift @@ -1,6 +1,6 @@ import Foundation -public struct ProcessClient { +public struct ProcessClient: Sendable { private let session: URLSession private let request: URLRequest @@ -188,7 +188,7 @@ public struct ProcessClient { // MARK: - Process - public func process() async throws -> Data { + public nonisolated func process() async throws -> Data { let (data, response) = try await session.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -197,7 +197,7 @@ public struct ProcessClient { guard (200 ... 299).contains(httpResponse.statusCode) else { let errorText = String(data: data, encoding: .utf8) ?? "Unknown error" - DecartLogger.log("error processing request: \(errorText), for route: \(request.url?.absoluteString ?? "unknown"), and body: \(String(decoding: request.httpBody ?? Data(), as: UTF8.self))", level: .error) + DecartLogger.log("error processing request: \(errorText), for route: \(request.url?.absoluteString ?? "unknown"), and body:", level: .error) throw DecartError.processingError( "Processing failed: \(httpResponse.statusCode) - \(errorText)") } diff --git a/Sources/DecartSDK/Realtime/DecartRealtimeManager.swift b/Sources/DecartSDK/Realtime/DecartRealtimeManager.swift new file mode 100644 index 0000000..248ca69 --- /dev/null +++ b/Sources/DecartSDK/Realtime/DecartRealtimeManager.swift @@ -0,0 +1,280 @@ +import Foundation +@preconcurrency import WebRTC + +public final class DecartRealtimeManager: @unchecked Sendable { + public let options: RealtimeConfiguration + public let events: AsyncStream + public private(set) var serviceStatus: RealtimeServiceStatus = .unknown { + didSet { + guard oldValue != serviceStatus else { return } + emitStateIfChanged() + } + } + public private(set) var queuePosition: Int? { + didSet { + guard oldValue != queuePosition else { return } + emitStateIfChanged() + } + } + public private(set) var queueSize: Int? { + didSet { + guard oldValue != queueSize else { return } + emitStateIfChanged() + } + } + + private var webRTCClient: WebRTCClient? + private var webSocketClient: WebSocketClient? + + private let signalingServerURL: URL + private let stateContinuation: AsyncStream.Continuation + private var webSocketListenerTask: Task? + private var connectionStateListenerTask: Task? + + private var connectionState: DecartRealtimeConnectionState = .idle { + didSet { + guard oldValue != connectionState else { return } + emitStateIfChanged() + } + } + + private var lastEmittedState: DecartRealtimeState? + private var currentState: DecartRealtimeState { + DecartRealtimeState( + connectionState: connectionState, + serviceStatus: serviceStatus, + queuePosition: queuePosition, + queueSize: queueSize + ) + } + + public init(signalingServerURL: URL, options: RealtimeConfiguration) { + self.signalingServerURL = signalingServerURL + self.options = options + + let (stream, continuation) = AsyncStream.makeStream( + of: DecartRealtimeState.self, + bufferingPolicy: .bufferingNewest(1) + ) + self.events = stream + self.stateContinuation = continuation + emitStateIfChanged() + } + + private func emitStateIfChanged() { + let state = currentState + if lastEmittedState != state { + lastEmittedState = state + stateContinuation.yield(state) + } + } + + deinit { + webSocketListenerTask?.cancel() + connectionStateListenerTask?.cancel() + webRTCClient?.close() + stateContinuation.finish() + DecartLogger.log("RealtimeManager (SDK) deinitialized", level: .info) + } +} + +// MARK: - Public API + +public extension DecartRealtimeManager { + func connect(localStream: RealtimeMediaStream) async throws -> RealtimeMediaStream { + connectionState = .connecting + + let wsClient = await WebSocketClient(url: signalingServerURL) + webSocketClient = wsClient + setupWebSocketListener(wsClient) + + if serviceStatus == .enteringQueue { + try await waitForServiceReady() + } + + let rtcClient = WebRTCClient( + config: options.connection.rtcConfiguration, + constraints: options.media.connectionConstraints, + videoConfig: options.media.video, + sendMessage: { [weak self] in self?.sendMessage($0) }, + withAudio: localStream.audioTrack != nil + ) + webRTCClient = rtcClient + setupConnectionStateListener(rtcClient) + + rtcClient.startLocalStreaming( + videoTrack: localStream.videoTrack, + audioTrack: localStream.audioTrack + ) + + let offer = try await rtcClient.createOffer(constraints: options.media.offerConstraints) + try await rtcClient.setLocalDescription(offer) + sendMessage(.offer(OfferMessage(sdp: offer.sdp))) + + try await waitForConnection(timeout: options.connection.connectionTimeout) + setPrompt(options.initialPrompt) + + guard let remoteStream = rtcClient.getRemoteRealtimeStream() else { + throw DecartError.webRTCError("couldn't get remote stream, check video transceiver") + } + + return remoteStream + } + + func disconnect() async { + connectionState = .disconnected + webSocketListenerTask?.cancel() + webSocketListenerTask = nil + connectionStateListenerTask?.cancel() + connectionStateListenerTask = nil + webRTCClient?.close() + webRTCClient = nil + await webSocketClient?.disconnect() + + #if canImport(WebRTC) && os(iOS) + let audioSession = RTCAudioSession.sharedInstance() + if audioSession.isActive { + audioSession.lockForConfiguration() + try? audioSession.setActive(false) + audioSession.unlockForConfiguration() + } + #endif + webSocketClient = nil + } + + func setPrompt(_ prompt: DecartPrompt) { + guard + options.model.hasReferenceImage + else { + // if !options.model.hasReferenceImage { + sendMessage(.prompt(PromptMessage(prompt: prompt.text))) + // } + return + } + + let base64Image = prompt.referenceImageData?.base64EncodedString() + Task { [weak self] in + guard let self else { return } + await self.sendImageWithPrompt( + base64Image, + prompt: prompt.text, + enhance: prompt.enrich + ) + } + } + + func waitForConnection(timeout: TimeInterval) async throws { + let startTime = Date() + while connectionState != .connected { + if connectionState == .error || connectionState == .disconnected { + throw DecartError.webRTCError("Connection failed") + } + if Date().timeIntervalSince(startTime) > timeout { + throw DecartError.webRTCError("Connection timeout") + } + try await Task.sleep(nanoseconds: 1_000_000_000) // 1 second + } + } +} + +// MARK: - Connection + +public extension DecartRealtimeManager { + func createVideoSource() -> RTCVideoSource { + WebRTCClient.createVideoSource() + } + + func replaceVideoTrack(with newTrack: RTCVideoTrack) { + webRTCClient?.replaceVideoTrack(with: newTrack) + } + + func createVideoTrack(source: RTCVideoSource, trackId: String) -> RTCVideoTrack { + WebRTCClient.createVideoTrack(source: source, trackId: trackId) + } + + func createAudioSource(constraints: RTCMediaConstraints? = nil) -> RTCAudioSource { + WebRTCClient.createAudioSource(constraints: constraints) + } + + func createAudioTrack(source: RTCAudioSource, trackId: String) -> RTCAudioTrack { + WebRTCClient.createAudioTrack(source: source, trackId: trackId) + } +} + +// MARK: - Listeners + +private extension DecartRealtimeManager { + func setupWebSocketListener(_ wsClient: WebSocketClient) { + webSocketListenerTask?.cancel() + webSocketListenerTask = Task { [weak self] in + do { + for try await message in wsClient.websocketEventStream { + guard !Task.isCancelled, let self else { return } + switch message { + case .status(let status): + self.serviceStatus = RealtimeServiceStatus.fromStatusString(status.status) + case .queuePosition(let queue): + self.queuePosition = queue.queuePosition + self.queueSize = queue.queueSize + case .promptAck, .sessionId: + break + default: + guard let webRTCClient = self.webRTCClient else { break } + try await webRTCClient.handleSignalingMessage(message) + } + } + self?.connectionState = .disconnected + } catch { + self?.connectionState = .error + } + } + } + + func setupConnectionStateListener(_ rtcClient: WebRTCClient) { + connectionStateListenerTask?.cancel() + connectionStateListenerTask = Task { [weak self] in + for await rtcState in rtcClient.connectionStateStream { + guard !Task.isCancelled, let self else { return } + switch rtcState { + case .connected: self.connectionState = .connected + case .failed, .closed, .disconnected: self.connectionState = .disconnected + case .connecting: self.connectionState = .connecting + default: break + } + } + } + } +} + + +// MARK: - Service Status + +private extension DecartRealtimeManager { + func waitForServiceReady() async throws { + while serviceStatus == .enteringQueue { + try await Task.sleep(nanoseconds: 3_000_000_000) + } + } +} + +// MARK: - Messaging + +private extension DecartRealtimeManager { + private func sendMessage(_ message: OutgoingWebSocketMessage) { + guard let webSocketClient else { return } + Task { [webSocketClient] in try? await webSocketClient.send(message) } + } + + func sendImageWithPrompt( + _ imageBase64: String?, + prompt: String, + enhance: Bool + ) async { + let message = SetImageMessage( + imageData: imageBase64, + prompt: prompt, + enhancePrompt: enhance + ) + sendMessage(.setImage(message)) + } +} diff --git a/Sources/DecartSDK/Realtime/Models/DecartPrompt.swift b/Sources/DecartSDK/Realtime/Models/DecartPrompt.swift new file mode 100644 index 0000000..0d31766 --- /dev/null +++ b/Sources/DecartSDK/Realtime/Models/DecartPrompt.swift @@ -0,0 +1,14 @@ +import Foundation + +public struct DecartPrompt: Sendable { + public let text: String + public let enrich: Bool + // for lucy 14b we must send a ref image with text prompt + public let referenceImageData: Data? + + public init(text: String, referenceImageData: Data? = nil, enrich: Bool = false) { + self.text = text + self.referenceImageData = referenceImageData + self.enrich = enrich + } +} diff --git a/Sources/DecartSDK/Realtime/Models/DecartRealtimeState.swift b/Sources/DecartSDK/Realtime/Models/DecartRealtimeState.swift new file mode 100644 index 0000000..172ea10 --- /dev/null +++ b/Sources/DecartSDK/Realtime/Models/DecartRealtimeState.swift @@ -0,0 +1,18 @@ +public struct DecartRealtimeState: Sendable, Equatable { + public let connectionState: DecartRealtimeConnectionState + public let serviceStatus: RealtimeServiceStatus + public let queuePosition: Int? + public let queueSize: Int? + + public init( + connectionState: DecartRealtimeConnectionState, + serviceStatus: RealtimeServiceStatus, + queuePosition: Int?, + queueSize: Int? + ) { + self.connectionState = connectionState + self.serviceStatus = serviceStatus + self.queuePosition = queuePosition + self.queueSize = queueSize + } +} diff --git a/Sources/DecartSDK/Realtime/Models/RealtimeConnectionState.swift b/Sources/DecartSDK/Realtime/Models/RealtimeConnectionState.swift new file mode 100644 index 0000000..771a920 --- /dev/null +++ b/Sources/DecartSDK/Realtime/Models/RealtimeConnectionState.swift @@ -0,0 +1,15 @@ +public enum DecartRealtimeConnectionState: String, Sendable { + case connecting = "Connecting" + case connected = "Connected" + case disconnected = "Disconnected" + case idle = "Idle" + case error = "Error" + + public var isConnected: Bool { + self == .connected + } + + public var isInSession: Bool { + self == .connected || self == .connecting + } +} diff --git a/Sources/DecartSDK/Realtime/RealtimeDataTypes.swift b/Sources/DecartSDK/Realtime/Models/RealtimeMediaStream.swift similarity index 52% rename from Sources/DecartSDK/Realtime/RealtimeDataTypes.swift rename to Sources/DecartSDK/Realtime/Models/RealtimeMediaStream.swift index f427358..ccb1004 100644 --- a/Sources/DecartSDK/Realtime/RealtimeDataTypes.swift +++ b/Sources/DecartSDK/Realtime/Models/RealtimeMediaStream.swift @@ -1,9 +1,3 @@ -// -// Realtime.swift -// DecartSDK -// -// Created by Alon Bar-el on 03/11/2025. -// @preconcurrency import WebRTC public struct RealtimeMediaStream: Sendable { @@ -15,7 +9,7 @@ public struct RealtimeMediaStream: Sendable { case .localStream: return "stream-local" case .remoteStream: - return "stream-remote" // It's good practice to handle all cases + return "stream-remote" } } } @@ -34,19 +28,3 @@ public struct RealtimeMediaStream: Sendable { self.id = id.id } } - -public enum DecartRealtimeConnectionState: String, Sendable { - case connecting = "Connecting" - case connected = "Connected" - case disconnected = "Disconnected" - case idle = "Idle" - case error = "Error" - - public var isConnected: Bool { - self == .connected - } - - public var isInSession: Bool { - self == .connected || self == .connecting - } -} diff --git a/Sources/DecartSDK/Realtime/Models/RealtimeServiceStatus.swift b/Sources/DecartSDK/Realtime/Models/RealtimeServiceStatus.swift new file mode 100644 index 0000000..b49e0c6 --- /dev/null +++ b/Sources/DecartSDK/Realtime/Models/RealtimeServiceStatus.swift @@ -0,0 +1,16 @@ +public enum RealtimeServiceStatus: String, Sendable { + case unknown + case enteringQueue = "Entering queue" + case ready = "Ready" + + static func fromStatusString(_ status: String) -> RealtimeServiceStatus { + let normalized = status.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if normalized.contains("ready") { + return .ready + } + if normalized.contains("entering queue") { + return .enteringQueue + } + return .unknown + } +} diff --git a/Sources/DecartSDK/Realtime/RealtimeClient+Media.swift b/Sources/DecartSDK/Realtime/RealtimeClient+Media.swift deleted file mode 100644 index 8d4c5a9..0000000 --- a/Sources/DecartSDK/Realtime/RealtimeClient+Media.swift +++ /dev/null @@ -1,34 +0,0 @@ -import Foundation -import WebRTC - -extension RealtimeClient { - // MARK: - Media Factory Methods - - public func getTransceivers() -> [RTCRtpTransceiver] { - webRTCManager.peerConnection.transceivers - } - - public func createAudioSource(constraints: RTCMediaConstraints? = nil) -> RTCAudioSource { - webRTCManager.factory.audioSource(with: constraints) - } - - public func createAudioTrack(source: RTCAudioSource, trackId: String) -> RTCAudioTrack { - webRTCManager.factory.audioTrack(with: source, trackId: trackId) - } - - public func createVideoSource() -> RTCVideoSource { - webRTCManager.factory.videoSource() - } - - public func createVideoTrack(source: RTCVideoSource, trackId: String) -> RTCVideoTrack { - webRTCManager.factory.videoTrack(with: source, trackId: trackId) - } - - public func createLocalVideoTrack() -> (RTCVideoTrack, RTCCameraVideoCapturer) { - let videoSource = createVideoSource() - let videoTrack = createVideoTrack(source: videoSource, trackId: UUID().uuidString) - let videoCapturer = RTCCameraVideoCapturer(delegate: videoSource) - return (videoTrack, videoCapturer) - } -} - diff --git a/Sources/DecartSDK/Realtime/RealtimeClient.swift b/Sources/DecartSDK/Realtime/RealtimeClient.swift deleted file mode 100644 index 7f421c4..0000000 --- a/Sources/DecartSDK/Realtime/RealtimeClient.swift +++ /dev/null @@ -1,94 +0,0 @@ -import Foundation -import WebRTC - -public final class RealtimeClient { - let webRTCManager: WebRTCManager - private let signalingServerURL: URL - public let options: RealtimeConfiguration - - public let events: AsyncStream - - public init(signalingServerURL: URL, options: RealtimeConfiguration) throws { - self.options = options - self.signalingServerURL = signalingServerURL - - self.webRTCManager = WebRTCManager( - realtimeConfig: options - ) - self.events = webRTCManager.signalingManager.events - } - - public func connect(localStream: RealtimeMediaStream) async throws -> RealtimeMediaStream { - webRTCManager.onWebrtcConnectedCallback = { [weak self] in - guard let self = self else { return } - self.setPrompt(self.options.initialState.prompt) - } - return try await connectWithRetry( - localStream: localStream, - maxRetries: 3, - permanentErrors: ["permission denied", "not allowed", "invalid session"] - ) - } - - public func disconnect() async { - await webRTCManager.disconnect() - } - - public func setPrompt(_ prompt: Prompt) { - webRTCManager.sendWebsocketMessage(.prompt(PromptMessage(prompt: prompt.text))) - } - - // MARK: - Private Helpers - - private func connectWithRetry( - localStream: RealtimeMediaStream, - maxRetries: Int, - permanentErrors: [String] - ) async throws -> RealtimeMediaStream { - var retries = 0 - var delay: TimeInterval = 1.0 - - while retries < maxRetries { - do { - try await webRTCManager.connect(url: signalingServerURL, localStream: localStream) - - guard - let remoteVideoTrack = getTransceivers().first(where: { $0.mediaType == .video } - )?.receiver.track as? RTCVideoTrack - else { - throw DecartError.webRTCError("Remote video track not found after connection.") - } - - let remoteAudioTrack = - getTransceivers().first(where: { $0.mediaType == .audio })?.receiver.track - as? RTCAudioTrack - - return RealtimeMediaStream( - videoTrack: remoteVideoTrack, - audioTrack: remoteAudioTrack, - id: .remoteStream - ) - } catch { - let errorMessage = error.localizedDescription.lowercased() - if permanentErrors.contains(where: { errorMessage.contains($0) }) { - DecartLogger.log( - "[RealtimeClient] Permanent error detected, aborting retries.", - level: .error - ) - throw error - } - - retries += 1 - if retries >= maxRetries { - DecartLogger.log("[RealtimeClient] Max retries reached.", level: .error) - throw error - } - - try await Task.sleep(nanoseconds: UInt64(delay * 1_000_000_000)) - delay = min(delay * 2, 10.0) - } - } - - throw DecartError.webRTCError("Connection failed after max retries.") - } -} diff --git a/Sources/DecartSDK/Realtime/RealtimeConfiguration.swift b/Sources/DecartSDK/Realtime/RealtimeConfiguration.swift index 31f21cf..e114585 100644 --- a/Sources/DecartSDK/Realtime/RealtimeConfiguration.swift +++ b/Sources/DecartSDK/Realtime/RealtimeConfiguration.swift @@ -10,18 +10,18 @@ import Foundation public struct RealtimeConfiguration: Sendable { public let model: ModelDefinition - public let initialState: ModelState + public let initialPrompt: DecartPrompt public let connection: ConnectionConfig public let media: MediaConfig public init( model: ModelDefinition, - initialState: ModelState, + initialPrompt: DecartPrompt, connection: ConnectionConfig = .init(), media: MediaConfig = .init() ) { self.model = model - self.initialState = initialState + self.initialPrompt = initialPrompt self.connection = connection self.media = media } @@ -30,27 +30,26 @@ public struct RealtimeConfiguration: Sendable { public struct ConnectionConfig: Sendable { public let iceServers: [String] - public let connectionTimeout: Int32 - public let pingInterval: Int32 + public let connectionTimeout: TimeInterval + public let rtcConfiguration: RTCConfiguration public init( iceServers: [String] = ["stun:stun.l.google.com:19302"], - connectionTimeout: Int32 = 7000, - pingInterval: Int32 = 2000 + connectionTimeout: TimeInterval = 15, + rtcConfiguration: RTCConfiguration? = nil ) { self.iceServers = iceServers self.connectionTimeout = connectionTimeout - self.pingInterval = pingInterval - } - - public func makeRTCConfiguration() -> RTCConfiguration { - let config = RTCConfiguration() - config.iceServers = [RTCIceServer(urlStrings: iceServers)] - config.sdpSemantics = .unifiedPlan - config.continualGatheringPolicy = .gatherContinually - config.iceConnectionReceivingTimeout = connectionTimeout -// config.iceBackupCandidatePairPingInterval = pingInterval - return config + if let rtcConfiguration { + self.rtcConfiguration = rtcConfiguration + } else { + let config = RTCConfiguration() + config.iceServers = [RTCIceServer(urlStrings: iceServers)] + config.sdpSemantics = .unifiedPlan + config.continualGatheringPolicy = .gatherContinually + config.iceCandidatePoolSize = 10 + self.rtcConfiguration = config + } } } @@ -83,8 +82,8 @@ public struct RealtimeConfiguration: Sendable { public let preferredCodec: String public init( - maxBitrate: Int = 3_800_000, - minBitrate: Int = 100_000, + maxBitrate: Int = 2_500_000, + minBitrate: Int = 300_000, maxFramerate: Int = 26, preferredCodec: String = "VP8" ) { @@ -94,22 +93,32 @@ public struct RealtimeConfiguration: Sendable { self.preferredCodec = preferredCodec } - public func configure(transceiver: RTCRtpTransceiver, factory: RTCPeerConnectionFactory) { + func makeTransceiverInit() -> RTCRtpTransceiverInit { + let transceiverInit = RTCRtpTransceiverInit() + transceiverInit.direction = .sendRecv + + let encoding = RTCRtpEncodingParameters() + encoding.maxBitrateBps = NSNumber(value: maxBitrate) + encoding.minBitrateBps = NSNumber(value: minBitrate) + encoding.maxFramerate = NSNumber(value: maxFramerate) + transceiverInit.sendEncodings = [encoding] + + return transceiverInit + } + + func configureTransceiver(_ transceiver: RTCRtpTransceiver, factory: RTCPeerConnectionFactory) { let supportedCodecs = factory.rtpSenderCapabilities(forKind: "video").codecs + let preferredCodecName = preferredCodec.uppercased() var preferredCodecs: [RTCRtpCodecCapability] = [] var otherCodecs: [RTCRtpCodecCapability] = [] var utilityCodecs: [RTCRtpCodecCapability] = [] - let preferredCodecName = preferredCodec.uppercased() - for codec in supportedCodecs { let codecNameUpper = codec.name.uppercased() if codecNameUpper == preferredCodecName { preferredCodecs.append(codec) - } else if codecNameUpper == "RTX" || codecNameUpper == "RED" - || codecNameUpper == "ULPFEC" - { + } else if codecNameUpper == "RTX" || codecNameUpper == "RED" || codecNameUpper == "ULPFEC" { utilityCodecs.append(codec) } else { otherCodecs.append(codec) @@ -117,18 +126,14 @@ public struct RealtimeConfiguration: Sendable { } let sortedCodecs = preferredCodecs + otherCodecs + utilityCodecs - try? transceiver.setCodecPreferences(sortedCodecs, error: ()) - - let sender = transceiver.sender - let parameters = sender.parameters - if parameters.encodings.indices.contains(0) { - let encodingParam = parameters.encodings[0] - encodingParam.maxBitrateBps = NSNumber(value: maxBitrate) - encodingParam.minBitrateBps = NSNumber(value: minBitrate) - encodingParam.maxFramerate = NSNumber(value: maxFramerate) - - parameters.encodings[0] = encodingParam - sender.parameters = parameters + do { + try transceiver.setCodecPreferences(sortedCodecs, error: ()) + } catch { + DecartLogger + .log( + "error while setting codec preferences: \(error)", + level: .error + ) } } } diff --git a/Sources/DecartSDK/Realtime/WebRTC/SignalingModel.swift b/Sources/DecartSDK/Realtime/Transport/WebSocket/SignalingModel.swift similarity index 52% rename from Sources/DecartSDK/Realtime/WebRTC/SignalingModel.swift rename to Sources/DecartSDK/Realtime/Transport/WebSocket/SignalingModel.swift index 60b159f..9b802ba 100644 --- a/Sources/DecartSDK/Realtime/WebRTC/SignalingModel.swift +++ b/Sources/DecartSDK/Realtime/Transport/WebSocket/SignalingModel.swift @@ -43,10 +43,7 @@ struct IceCandidateMessage: Codable, Sendable { init(candidate: RTCIceCandidate) { guard let sdpMid = candidate.sdpMid else { - DecartLogger.log("found invalid candidate without sdpMid", level: .warning) - fatalError( - "found invalid candidate without sdpMid. This should never happen." - ) + fatalError("found invalid candidate without sdpMid") } self.type = "ice-candidate" @@ -68,13 +65,65 @@ struct PromptMessage: Codable, Sendable { } } -struct SwitchCameraMessage: Codable, Sendable { +struct SetImageMessage: Codable, Sendable { + let type: String + let prompt: String? + let imageData: String? + let enhancePrompt: Bool? + + init(imageData: String?, prompt: String? = nil, enhancePrompt: Bool? = nil) { + self.type = "set_image" + self.prompt = prompt + self.imageData = imageData + self.enhancePrompt = enhancePrompt + } + + private enum CodingKeys: String, CodingKey { + case type + case prompt + case imageData = "image_data" + case enhancePrompt = "enhance_prompt" + } +} + +struct ServerErrorMessage: Codable, Sendable { + let type: String + let message: String? + let error: String? +} + +struct SessionIdMessage: Codable, Sendable { + let type: String + let sessionId: String? + let session_id: String? + + var id: String? { sessionId ?? session_id } +} + +struct PromptAckMessage: Codable, Sendable { let type: String - let rotateY: Int +} + +struct SetImageAckMessage: Codable, Sendable { + let type: String + let success: Bool + let error: String? +} + +struct StatusMessage: Codable, Sendable { + let type: String + let status: String +} + +struct QueuePositionMessage: Codable, Sendable { + let type: String + let queuePosition: Int? + let queueSize: Int? - init(rotateY: Int) { - self.type = "switch_camera" - self.rotateY = rotateY + private enum CodingKeys: String, CodingKey { + case type + case queuePosition = "queue_position" + case queueSize = "queue_size" } } @@ -82,11 +131,16 @@ enum IncomingWebSocketMessage: Codable, Sendable { case offer(OfferMessage) case answer(AnswerMessage) case iceCandidate(IceCandidateMessage) + case error(ServerErrorMessage) + case sessionId(SessionIdMessage) + case promptAck(PromptAckMessage) + case setImageAck(SetImageAckMessage) + case status(StatusMessage) + case queuePosition(QueuePositionMessage) init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) let type = try container.decode(String.self, forKey: .type) - DecartLogger.log("got incoming message \(type)", level: .info) switch type { case "offer": @@ -95,6 +149,18 @@ enum IncomingWebSocketMessage: Codable, Sendable { self = try .answer(AnswerMessage(from: decoder)) case "ice-candidate": self = try .iceCandidate(IceCandidateMessage(from: decoder)) + case "error": + self = try .error(ServerErrorMessage(from: decoder)) + case "session_id": + self = try .sessionId(SessionIdMessage(from: decoder)) + case "prompt_ack": + self = try .promptAck(PromptAckMessage(from: decoder)) + case "set_image_ack": + self = try .setImageAck(SetImageAckMessage(from: decoder)) + case "status": + self = try .status(StatusMessage(from: decoder)) + case "queue_position": + self = try .queuePosition(QueuePositionMessage(from: decoder)) default: throw DecodingError.dataCorruptedError( forKey: .type, @@ -112,6 +178,18 @@ enum IncomingWebSocketMessage: Codable, Sendable { try msg.encode(to: encoder) case .iceCandidate(let msg): try msg.encode(to: encoder) + case .error(let msg): + try msg.encode(to: encoder) + case .sessionId(let msg): + try msg.encode(to: encoder) + case .promptAck(let msg): + try msg.encode(to: encoder) + case .setImageAck(let msg): + try msg.encode(to: encoder) + case .status(let msg): + try msg.encode(to: encoder) + case .queuePosition(let msg): + try msg.encode(to: encoder) } } @@ -125,7 +203,7 @@ enum OutgoingWebSocketMessage: Codable, Sendable { case answer(AnswerMessage) case iceCandidate(IceCandidateMessage) case prompt(PromptMessage) - case switchCamera(SwitchCameraMessage) + case setImage(SetImageMessage) func encode(to encoder: Encoder) throws { switch self { @@ -137,9 +215,8 @@ enum OutgoingWebSocketMessage: Codable, Sendable { try msg.encode(to: encoder) case .prompt(let msg): try msg.encode(to: encoder) - case .switchCamera(let msg): + case .setImage(let msg): try msg.encode(to: encoder) } } } - diff --git a/Sources/DecartSDK/Realtime/Transport/WebSocket/WebSocketClient.swift b/Sources/DecartSDK/Realtime/Transport/WebSocket/WebSocketClient.swift new file mode 100644 index 0000000..59859a4 --- /dev/null +++ b/Sources/DecartSDK/Realtime/Transport/WebSocket/WebSocketClient.swift @@ -0,0 +1,81 @@ +import Foundation +import WebSocket + +actor WebSocketClient { + private var socket: WebSocket? + private var listeningTask: Task? + private let decoder = JSONDecoder() + private let encoder = JSONEncoder() + + private let eventStreamContinuation: AsyncStream.Continuation + nonisolated let websocketEventStream: AsyncStream + + init(url: URL) async { + let (websocketEventStream, eventStreamContinuation) = + AsyncStream.makeStream(of: IncomingWebSocketMessage.self) + self.eventStreamContinuation = eventStreamContinuation + self.websocketEventStream = websocketEventStream + + do { + let newSocket = try await WebSocket.system(url: url) + socket = newSocket + try await newSocket.open() + mountListener(socket: newSocket) + } catch { + socket = nil + eventStreamContinuation.finish() + DecartLogger.log( + "unable to open websocket: \(error)", + level: .error + ) + } + } + + private func mountListener(socket: WebSocket) { + listeningTask?.cancel() + listeningTask = Task { [weak self] in + guard let self else { return } + for await msg in socket.messages { + if Task.isCancelled { return } + await self.handleIncomingMessage(msg) + } + await self.finishStream() + } + } + + private func handleIncomingMessage(_ message: WebSocketMessage) { + guard + let text = message.stringValue, + let data = text.data(using: .utf8) + else { return } + + do { + let message = try decoder.decode(IncomingWebSocketMessage.self, from: data) + eventStreamContinuation.yield(message) + } catch { + DecartLogger.log( + "unable to decode websocket message: \(error)", + level: .warning + ) + } + } + + private func finishStream() { + eventStreamContinuation.finish() + } + + func send(_ message: T) async throws { + let data = try encoder.encode(message) + guard let jsonString = String(data: data, encoding: .utf8) else { return } + guard let socket else { return } + try await socket.send(.text(jsonString)) + } + + func disconnect() async { + listeningTask?.cancel() + listeningTask = nil + eventStreamContinuation.finish() + guard let socket else { return } + try? await socket.close() + } +} diff --git a/Sources/DecartSDK/Realtime/WebRTC/RTCPeerConnection+Ext.swift b/Sources/DecartSDK/Realtime/WebRTC/RTCPeerConnection+Ext.swift index 86ee2a7..b42d003 100644 --- a/Sources/DecartSDK/Realtime/WebRTC/RTCPeerConnection+Ext.swift +++ b/Sources/DecartSDK/Realtime/WebRTC/RTCPeerConnection+Ext.swift @@ -7,6 +7,7 @@ @preconcurrency import WebRTC extension RTCSessionDescription: @unchecked @retroactive Sendable {} +extension RTCCameraVideoCapturer: @unchecked @retroactive Sendable {} extension RTCPeerConnection { func offer(for constraints: RTCMediaConstraints) async throws -> RTCSessionDescription? { diff --git a/Sources/DecartSDK/Realtime/WebRTC/SignalingClient.swift b/Sources/DecartSDK/Realtime/WebRTC/SignalingClient.swift new file mode 100644 index 0000000..d0a067b --- /dev/null +++ b/Sources/DecartSDK/Realtime/WebRTC/SignalingClient.swift @@ -0,0 +1,50 @@ +import Foundation +@preconcurrency import WebRTC + +struct SignalingClient { + private let peerConnection: RTCPeerConnection + private let factory: RTCPeerConnectionFactory + private let sendMessage: (OutgoingWebSocketMessage) -> Void + + init( + peerConnection: RTCPeerConnection, + factory: RTCPeerConnectionFactory, + sendMessage: @escaping (OutgoingWebSocketMessage) -> Void + ) { + self.peerConnection = peerConnection + self.factory = factory + self.sendMessage = sendMessage + } + + func handleMessage(_ message: IncomingWebSocketMessage) async throws { + switch message { + case .offer(let msg): + let sdp = RTCSessionDescription(type: .offer, sdp: msg.sdp) + try await peerConnection.setRemoteDescription(sdp) + let constraints = RTCMediaConstraints(mandatoryConstraints: nil, optionalConstraints: nil) + guard let answer = try await peerConnection.answer(for: constraints) else { + throw DecartError.webRTCError("Failed to create answer") + } + try await peerConnection.setLocalDescription(answer) + sendMessage(.answer(AnswerMessage(type: "answer", sdp: answer.sdp))) + + case .answer(let msg): + let sdp = RTCSessionDescription(type: .answer, sdp: msg.sdp) + try await peerConnection.setRemoteDescription(sdp) + + case .iceCandidate(let msg): + let candidate = RTCIceCandidate( + sdp: msg.candidate.candidate, + sdpMLineIndex: msg.candidate.sdpMLineIndex, + sdpMid: msg.candidate.sdpMid + ) + try await peerConnection.add(candidate) + + case .error(let msg): + throw DecartError.serverError(msg.message ?? msg.error ?? "Unknown server error") + + case .sessionId, .promptAck, .setImageAck, .status, .queuePosition: + break + } + } +} diff --git a/Sources/DecartSDK/Realtime/WebRTC/SignalingManager.swift b/Sources/DecartSDK/Realtime/WebRTC/SignalingManager.swift deleted file mode 100644 index f5beb58..0000000 --- a/Sources/DecartSDK/Realtime/WebRTC/SignalingManager.swift +++ /dev/null @@ -1,127 +0,0 @@ -import Foundation -@preconcurrency import WebRTC - -/// Manages WebSocket signaling connection with AsyncStream-based message delivery -actor SignalingManager { - private let webSocket: WebSocketClient - private let peerConnection: RTCPeerConnection - private var wsListenerTask: Task? - - private var state: DecartRealtimeConnectionState = .idle { - didSet { - guard oldValue != state else { return } - stateContinuation.yield(state) - } - } - - private let stateContinuation: AsyncStream.Continuation - nonisolated let events: AsyncStream - - init(pc: RTCPeerConnection) { - peerConnection = pc - webSocket = WebSocketClient() - let (stream, continuation) = AsyncStream.makeStream( - of: DecartRealtimeConnectionState.self, - bufferingPolicy: .bufferingNewest(1) - ) - events = stream - stateContinuation = continuation - } - - func connect(url: URL, timeout: TimeInterval = 30) async { - state = .connecting - await webSocket.connect(url: url) - let task = Task { - let eventStream = self.webSocket.websocketEventStream - do { - for try await event in eventStream { - if Task.isCancelled { return } - await self.handle(event) - } - } catch { - DecartLogger.log("error in signaling loop: \(error)", level: .error) - self.state = .error - } - } - if wsListenerTask != nil { - wsListenerTask?.cancel() - wsListenerTask = nil - } - - wsListenerTask = task - } - - func updatePeerConnectionState(_ newState: RTCPeerConnectionState) { - switch newState { - case .connected: - state = .connected - case .failed, .closed: - state = .disconnected - case .connecting: - // Keep as connecting if we are already there, or set it if we were idle - if state != .connecting, state != .connected { - state = .connecting - } - case .disconnected: - state = .disconnected - case .new: - break // Initial state, usually - @unknown default: - break - } - } - - func handle(_ message: IncomingWebSocketMessage) async { - do { - switch message { - case .offer(let msg): - let sdp = RTCSessionDescription(type: .offer, sdp: msg.sdp) - try await peerConnection.setRemoteDescription(sdp) - - let constraints = RTCMediaConstraints( - mandatoryConstraints: nil, - optionalConstraints: nil - ) - - guard let answer = try? await peerConnection.answer(for: constraints) else { - DecartLogger.log("[WebRTCConnection] Failed to create answer", level: .error) - throw DecartError.webRTCError("failed to create answer, check logs") - } - - try await peerConnection.setLocalDescription(answer) - await send(.answer(AnswerMessage(type: "answer", sdp: answer.sdp))) - - case .answer(let msg): - let sdp = RTCSessionDescription(type: .answer, sdp: msg.sdp) - try await peerConnection.setRemoteDescription(sdp) - - case .iceCandidate(let msg): - let candidate = RTCIceCandidate( - sdp: msg.candidate.candidate, - sdpMLineIndex: msg.candidate.sdpMLineIndex, - sdpMid: msg.candidate.sdpMid - ) - try await peerConnection.add(candidate) - } - } catch { - DecartLogger.log("error while handling websocket message: \(error)", level: .error) - } - } - - nonisolated func send(_ message: OutgoingWebSocketMessage) { - Task { - do { - try await webSocket.send(message) - } catch { - DecartLogger.log("error while sending websocket message: \(error)", level: .error) - } - } - } - - func disconnect() async { - state = .disconnected - await webSocket.disconnect() - wsListenerTask?.cancel() - wsListenerTask = nil - } -} diff --git a/Sources/DecartSDK/Realtime/WebRTC/WebRTCClient.swift b/Sources/DecartSDK/Realtime/WebRTC/WebRTCClient.swift new file mode 100644 index 0000000..1565ae9 --- /dev/null +++ b/Sources/DecartSDK/Realtime/WebRTC/WebRTCClient.swift @@ -0,0 +1,194 @@ +import Foundation +@preconcurrency import WebRTC + +final class WebRTCClient: @unchecked Sendable { + private nonisolated(unsafe) static var sharedFactory: RTCPeerConnectionFactory? + private static let factoryLock = NSLock() + + nonisolated(unsafe) let factory: RTCPeerConnectionFactory + let peerConnection: RTCPeerConnection + let connectionStateStream: AsyncStream + + private let delegateHandler: WebRTCDelegateHandler + private let signalingClient: SignalingClient + private let connectionStateContinuation: AsyncStream.Continuation + + nonisolated(unsafe) var videoTransceiver: RTCRtpTransceiver? + nonisolated(unsafe) var audioTransceiver: RTCRtpTransceiver? + + private static func getOrCreateFactory() -> RTCPeerConnectionFactory { + factoryLock.lock() + defer { factoryLock.unlock() } + + if let factory = sharedFactory { + return factory + } + + RTCInitializeSSL() + + let factory = RTCPeerConnectionFactory( + encoderFactory: RTCDefaultVideoEncoderFactory(), + decoderFactory: RTCDefaultVideoDecoderFactory() + ) + sharedFactory = factory + return factory + } + + init( + config: RTCConfiguration, + constraints: RTCMediaConstraints, + videoConfig: RealtimeConfiguration.VideoConfig, + sendMessage: @escaping (OutgoingWebSocketMessage) -> Void, + withAudio: Bool + ) { + self.factory = Self.getOrCreateFactory() + + let (stream, continuation) = AsyncStream.makeStream( + of: RTCPeerConnectionState.self, + bufferingPolicy: .bufferingNewest(1) + ) + self.connectionStateStream = stream + self.connectionStateContinuation = continuation + + self.delegateHandler = WebRTCDelegateHandler( + sendMessage: sendMessage, + connectionStateContinuation: continuation + ) + + self.peerConnection = factory.peerConnection( + with: config, + constraints: constraints, + delegate: delegateHandler + )! + + self.signalingClient = SignalingClient( + peerConnection: peerConnection, + factory: factory, + sendMessage: sendMessage + ) + + prepareTransceivers(videoConfig: videoConfig, withAudio: withAudio) + } + + func handleSignalingMessage(_ message: IncomingWebSocketMessage) async throws { + try await signalingClient.handleMessage(message) + } + + deinit { + DecartLogger.log("Webrtc client deinitialized", level: .info) + close() + } +} + +// MARK: - Track Operations + +extension WebRTCClient { + func prepareTransceivers(videoConfig: RealtimeConfiguration.VideoConfig, withAudio: Bool) { + if withAudio { + let audioInit = RTCRtpTransceiverInit() + audioInit.direction = .sendRecv + audioTransceiver = peerConnection.addTransceiver(of: .audio, init: audioInit) + } + + videoTransceiver = peerConnection.addTransceiver(of: .video, init: videoConfig.makeTransceiverInit()) + if let videoTransceiver { + videoConfig.configureTransceiver(videoTransceiver, factory: factory) + } + } + + nonisolated func replaceVideoTrack(with newTrack: RTCVideoTrack) { + guard let videoTransceiver else { + fatalError("Video track does not exist") + } + videoTransceiver.sender.track = newTrack + } + + nonisolated static func createVideoSource() -> RTCVideoSource { + WebRTCClient.getOrCreateFactory().videoSource() + } + + nonisolated static func createVideoTrack(source: RTCVideoSource, trackId: String) -> RTCVideoTrack { + WebRTCClient.getOrCreateFactory().videoTrack(with: source, trackId: trackId) + } + + nonisolated static func createAudioSource(constraints: RTCMediaConstraints? = nil) -> RTCAudioSource { + WebRTCClient.getOrCreateFactory().audioSource(with: constraints) + } + + nonisolated static func createAudioTrack(source: RTCAudioSource, trackId: String) -> RTCAudioTrack { + WebRTCClient.getOrCreateFactory().audioTrack(with: source, trackId: trackId) + } +} + +// MARK: - Streaming + +extension WebRTCClient { + nonisolated func getRemoteRealtimeStream() -> RealtimeMediaStream? { + guard let remoteVideoTrack = videoTransceiver?.receiver.track as? RTCVideoTrack else { + return nil + } + + let remoteAudioTrack = audioTransceiver?.receiver.track as? RTCAudioTrack + + return RealtimeMediaStream( + videoTrack: remoteVideoTrack, + audioTrack: remoteAudioTrack, + id: .remoteStream + ) + } + + @discardableResult + nonisolated func startLocalStreaming(videoTrack: RTCVideoTrack, audioTrack: RTCAudioTrack? = nil) -> RealtimeMediaStream { + if let videoSender = videoTransceiver?.sender { + videoSender.track = videoTrack + } + + if let audioSender = audioTransceiver?.sender { + audioSender.track = audioTrack + } + + return RealtimeMediaStream( + videoTrack: videoTrack, + audioTrack: audioTrack, + id: .localStream + ) + } +} + +// MARK: - SDP Operations + +extension WebRTCClient { + func createOffer(constraints: RTCMediaConstraints) async throws -> RTCSessionDescription { + guard let offer = try await peerConnection.offer(for: constraints) else { + throw DecartError.webRTCError("failed to create offer") + } + return offer + } + + func setLocalDescription(_ sdp: RTCSessionDescription) async throws { + try await peerConnection.setLocalDescription(sdp) + } +} + +// MARK: - ICE Operations + +extension WebRTCClient { + func addIceCandidate(_ candidate: RTCIceCandidate) async throws { + try await peerConnection.add(candidate) + } +} + +// MARK: - Cleanup + +extension WebRTCClient { + func close() { + videoTransceiver?.sender.track = nil + audioTransceiver?.sender.track = nil + delegateHandler.cleanup() + connectionStateContinuation.finish() + peerConnection.close() + peerConnection.delegate = nil + videoTransceiver = nil + audioTransceiver = nil + } +} diff --git a/Sources/DecartSDK/Realtime/WebRTC/WebRTCDelegateHandler.swift b/Sources/DecartSDK/Realtime/WebRTC/WebRTCDelegateHandler.swift new file mode 100644 index 0000000..fb8d531 --- /dev/null +++ b/Sources/DecartSDK/Realtime/WebRTC/WebRTCDelegateHandler.swift @@ -0,0 +1,62 @@ +import Foundation +@preconcurrency import WebRTC + +final class WebRTCDelegateHandler: NSObject { + private let sendMessage: (OutgoingWebSocketMessage) -> Void + private let connectionStateContinuation: AsyncStream.Continuation + + init( + sendMessage: @escaping (OutgoingWebSocketMessage) -> Void, + connectionStateContinuation: AsyncStream.Continuation + ) { + self.sendMessage = sendMessage + self.connectionStateContinuation = connectionStateContinuation + } + + func cleanup() { + connectionStateContinuation.finish() + } + + deinit { + DecartLogger.log("WebRTCDelegateHandler deinitialized", level: .info) + cleanup() + } +} + +extension WebRTCDelegateHandler: RTCPeerConnectionDelegate { + func peerConnection( + _ peerConnection: RTCPeerConnection, didChange stateChanged: RTCSignalingState + ) {} + + func peerConnection(_ peerConnection: RTCPeerConnection, didAdd stream: RTCMediaStream) {} + + func peerConnection(_ peerConnection: RTCPeerConnection, didRemove stream: RTCMediaStream) {} + + func peerConnectionShouldNegotiate(_ peerConnection: RTCPeerConnection) {} + + func peerConnection( + _ peerConnection: RTCPeerConnection, didChange newState: RTCIceConnectionState + ) {} + + func peerConnection( + _ peerConnection: RTCPeerConnection, didChange newState: RTCIceGatheringState + ) {} + + func peerConnection(_ peerConnection: RTCPeerConnection, didGenerate candidate: RTCIceCandidate) { + sendMessage(.iceCandidate(IceCandidateMessage(candidate: candidate))) + } + + func peerConnection( + _ peerConnection: RTCPeerConnection, didRemove candidates: [RTCIceCandidate] + ) {} + + func peerConnection(_ peerConnection: RTCPeerConnection, didOpen dataChannel: RTCDataChannel) {} + + func peerConnection( + _ peerConnection: RTCPeerConnection, didChange newState: RTCPeerConnectionState + ) { + connectionStateContinuation.yield(newState) + } +} + +extension WebRTCDelegateHandler: @unchecked Sendable {} diff --git a/Sources/DecartSDK/Realtime/WebRTC/WebRTCManager.swift b/Sources/DecartSDK/Realtime/WebRTC/WebRTCManager.swift deleted file mode 100644 index 4a31b10..0000000 --- a/Sources/DecartSDK/Realtime/WebRTC/WebRTCManager.swift +++ /dev/null @@ -1,138 +0,0 @@ -import Foundation -@preconcurrency import WebRTC - -final class WebRTCManager: NSObject { - let factory: RTCPeerConnectionFactory - - @objc let peerConnection: RTCPeerConnection - - let signalingManager: SignalingManager - private let realtimeConfig: RealtimeConfiguration - var onWebrtcConnectedCallback: (() -> Void)? - - init( - realtimeConfig: RealtimeConfiguration - ) { - #if IS_DEVELOPMENT - RTCSetMinDebugLogLevel(.verbose) - #endif - RTCInitializeSSL() - let videoEncoderFactory = RTCDefaultVideoEncoderFactory() - let videoDecoderFactory = RTCDefaultVideoDecoderFactory() - self.factory = RTCPeerConnectionFactory( - encoderFactory: videoEncoderFactory, decoderFactory: videoDecoderFactory) - - let config = realtimeConfig.connection.makeRTCConfiguration() - let constraints = realtimeConfig.media.connectionConstraints - - self.peerConnection = factory.peerConnection( - with: config, - constraints: constraints, - delegate: nil)! - self.signalingManager = SignalingManager(pc: peerConnection) - self.realtimeConfig = realtimeConfig - super.init() - peerConnection.delegate = self - } - - func connect(url: URL, localStream: RealtimeMediaStream, timeout: TimeInterval = 30) - async throws - { - do { - peerConnection.add(localStream.videoTrack, streamIds: [localStream.id]) - if let audioTrack = localStream.audioTrack { - peerConnection.add(audioTrack, streamIds: [localStream.id]) - } - - if let transceiver = peerConnection.transceivers.first(where: { $0.mediaType == .video } - ) { - await realtimeConfig.media.video.configure( - transceiver: transceiver, factory: factory) - } - - await signalingManager.connect(url: url) - try await sendOffer() - } catch { - DecartLogger.log("failed to create webrtc connection", level: .error) - await cleanup() - throw error - } - } - - func disconnect() async { - await cleanup() - } - - func sendWebsocketMessage(_ message: OutgoingWebSocketMessage) { - signalingManager.send(message) - } - - private func cleanup() async { - peerConnection.close() - peerConnection.delegate = nil - await signalingManager.disconnect() - } - - private func handleConnectionStateChange(_ rtcState: RTCPeerConnectionState) { - DecartLogger.log("got new state: \(rtcState)", level: .info) - Task { - await signalingManager.updatePeerConnectionState(rtcState) - } - } - - private func sendOffer() async throws { - let constraints = realtimeConfig.media.offerConstraints - guard let offer = try? await peerConnection.offer(for: constraints) else { - throw DecartError.webRTCError("failed to create offer, aborting") - } - - try await peerConnection.setLocalDescription(offer) - signalingManager.send(.offer(OfferMessage(sdp: offer.sdp))) - } - - deinit { - DecartLogger.log("WebRTCManager deinit", level: .info) - } -} - -extension WebRTCManager: RTCPeerConnectionDelegate, @unchecked Sendable { - func peerConnection( - _ peerConnection: RTCPeerConnection, didChange stateChanged: RTCSignalingState - ) {} - - func peerConnection(_ peerConnection: RTCPeerConnection, didAdd stream: RTCMediaStream) {} - - func peerConnection(_ peerConnection: RTCPeerConnection, didRemove stream: RTCMediaStream) {} - - func peerConnectionShouldNegotiate(_ peerConnection: RTCPeerConnection) {} - - func peerConnection( - _ peerConnection: RTCPeerConnection, didChange newState: RTCIceConnectionState - ) {} - - func peerConnection( - _ peerConnection: RTCPeerConnection, didChange newState: RTCIceGatheringState - ) {} - - func peerConnection(_ peerConnection: RTCPeerConnection, didGenerate candidate: RTCIceCandidate) { - signalingManager.send( - OutgoingWebSocketMessage.iceCandidate( - .init(candidate: candidate))) - } - - func peerConnection( - _ peerConnection: RTCPeerConnection, didRemove candidates: [RTCIceCandidate] - ) {} - - func peerConnection(_ peerConnection: RTCPeerConnection, didOpen dataChannel: RTCDataChannel) {} - - func peerConnection( - _ peerConnection: RTCPeerConnection, didChange newState: RTCPeerConnectionState - ) { - if newState == .connected { - onWebrtcConnectedCallback?() - } - - handleConnectionStateChange(newState) - } -} diff --git a/Sources/DecartSDK/Realtime/Websocket/SocketStream.swift b/Sources/DecartSDK/Realtime/Websocket/SocketStream.swift deleted file mode 100644 index 40cddb4..0000000 --- a/Sources/DecartSDK/Realtime/Websocket/SocketStream.swift +++ /dev/null @@ -1,83 +0,0 @@ -// -// SocketStream.swift -// DecartSDK -// -// Created by Alon Bar-el on 03/11/2025. -// -import Foundation - -typealias WebSocketStream = AsyncThrowingStream - -extension URLSessionWebSocketTask { - var stream: WebSocketStream { - return WebSocketStream { continuation in - Task { - var isAlive = true - while isAlive && closeCode == .invalid { - do { - let value = try await receive() - continuation.yield(value) - } catch { - continuation.finish(throwing: error) - isAlive = false - } - } - } - } - } -} - -final class SocketStream: AsyncSequence, @unchecked Sendable { - typealias AsyncIterator = WebSocketStream.Iterator - typealias Element = URLSessionWebSocketTask.Message - - private var continuation: WebSocketStream.Continuation? - private let task: URLSessionWebSocketTask - - private lazy var stream: WebSocketStream = WebSocketStream { continuation in - self.continuation = continuation - waitForNextValue() - } - - init(task: URLSessionWebSocketTask) { - self.task = task - task.resume() - } - - private func waitForNextValue() { - guard task.closeCode == .invalid else { - continuation?.finish() - return - } - task.receive(completionHandler: { [weak self] result in - guard let continuation = self?.continuation else { - return - } - do { - let message = try result.get() - continuation.yield(message) - self?.waitForNextValue() - } catch { - continuation.finish(throwing: error) - } - }) - } - - deinit { - cancel() - continuation?.finish() - } - - func sendMessage(_ message: URLSessionWebSocketTask.Message) async throws { - try await task.send(message) - } - - func makeAsyncIterator() -> AsyncIterator { - return stream.makeAsyncIterator() - } - - func cancel() { - task.cancel(with: .goingAway, reason: nil) - continuation?.finish() - } -} diff --git a/Sources/DecartSDK/Realtime/Websocket/WebSocketClient.swift b/Sources/DecartSDK/Realtime/Websocket/WebSocketClient.swift deleted file mode 100644 index 32417b9..0000000 --- a/Sources/DecartSDK/Realtime/Websocket/WebSocketClient.swift +++ /dev/null @@ -1,97 +0,0 @@ -// -// WebSocketClient.swift -// DecartSDK -// -// Created by Alon Bar-el on 03/11/2025. -// - -import Foundation -import Observation - -actor WebSocketClient { - var isConnected: Bool = false - var socketError: DecartError? - - private var stream: SocketStream? - private var listeningTask: Task? - private let decoder = JSONDecoder() - private let encoder = JSONEncoder() - - private var eventStreamContinuation: AsyncStream.Continuation - let websocketEventStream: AsyncStream - - init() { - let (websocketEventStream, eventStreamContinuation) = AsyncStream.makeStream(of: IncomingWebSocketMessage.self) - self.eventStreamContinuation = eventStreamContinuation - self.websocketEventStream = websocketEventStream - } - - func connect(url: URL) { - if stream != nil { return } - let socketConnection = URLSession.shared.webSocketTask(with: url) - stream = SocketStream(task: socketConnection) - listeningTask = Task { [weak self] in - guard let self = self, let stream = await self.stream else { - return - } - do { - for try await msg in stream { - switch msg { - case .string(let text): - await self.handleIncomingMessage(text) - case .data(let d): - if let text = String(data: d, encoding: .utf8) { - await self.handleIncomingMessage(text) - } - @unknown default: break - } - } - } catch { - DecartLogger - .log("error in ws listening loop: \(error)", level: .error) - await self.eventStreamContinuation.finish() - } - } - } - - private func handleIncomingMessage(_ text: String) async { - guard let data = text.data(using: .utf8) else { return } - - do { - let message = try decoder.decode(IncomingWebSocketMessage.self, from: data) - eventStreamContinuation.yield(message) - } catch { - DecartLogger - .log("error while handling incoming message: \(error)", level: .error) - eventStreamContinuation.finish() - } - } - - func send(_ message: T) throws { - guard let stream = stream else { - DecartLogger.log("tried to send ws message when its closed", level: .warning) - return - } - - let data = try encoder.encode(message) - guard let jsonString = String(data: data, encoding: .utf8) else { - DecartLogger.log("unable to encode message", level: .warning) - throw DecartError.websocketError("unable to encode message") - } - Task { [stream] in - try await stream.sendMessage(.string(jsonString)) - } - } - - func disconnect() async { - DecartLogger.log("disconnecting from websocket", level: .info) - eventStreamContinuation.finish() - listeningTask?.cancel() - listeningTask = nil - stream?.cancel() - stream = nil - isConnected = false - } - - deinit { DecartLogger.log("Websocket Client deinit", level: .info) } -} diff --git a/Sources/DecartSDK/Shared/DecartError.swift b/Sources/DecartSDK/Shared/DecartError.swift index a8ec5f3..b95bc3f 100644 --- a/Sources/DecartSDK/Shared/DecartError.swift +++ b/Sources/DecartSDK/Shared/DecartError.swift @@ -1,6 +1,6 @@ import Foundation -public enum DecartError: Error { +public enum DecartError: LocalizedError { case invalidAPIKey case invalidBaseURL(String?) case webRTCError(String) @@ -11,6 +11,7 @@ public enum DecartError: Error { case connectionTimeout case websocketError(String) case networkError(Error) + case serverError(String) public var errorDescription: String? { switch self { @@ -37,6 +38,8 @@ public enum DecartError: Error { return "WebSocket error: \(message)" case .networkError(let error): return "Network error: \(error.localizedDescription)" + case .serverError(let message): + return "Server error: \(message)" } } @@ -62,6 +65,8 @@ public enum DecartError: Error { return "WEBSOCKET_ERROR" case .networkError: return "NETWORK_ERROR" + case .serverError: + return "SERVER_ERROR" } } } diff --git a/Sources/DecartSDK/Shared/Logger.swift b/Sources/DecartSDK/Shared/Logger.swift index d580e1e..7bcc2a4 100644 --- a/Sources/DecartSDK/Shared/Logger.swift +++ b/Sources/DecartSDK/Shared/Logger.swift @@ -7,8 +7,7 @@ import Foundation public enum DecartLogger: Sendable { - public static let printImportantOnly: Bool = ProcessInfo.processInfo.environment["printImportantOnly"] == "YES" - + public static let pringDebugLogs: Bool = ProcessInfo.processInfo.environment["ENABLE_DECART_SDK_DUBUG_LOGS"] == "YES" public enum Level: String, Sendable { case info = "ℹ️" case warning = "⚠️" @@ -30,8 +29,8 @@ public enum DecartLogger: Sendable { public static func log(_ string: String, level: Level, logBreadcrumbEnabled: Bool = true) { let logString = "[DecartSDK -\(dateFormatter.string(from: Date.now)) \(level.rawValue)] - \(string)" - if DecartLogger.printImportantOnly { - if level == .important { + if !DecartLogger.pringDebugLogs { + if level == .important || level == .error { print(logString) } } else { diff --git a/Sources/DecartSDK/SwiftUI/RTCMLVideoViewWrapper.swift b/Sources/DecartSDK/SwiftUI/RTCMLVideoViewWrapper.swift deleted file mode 100644 index adcba51..0000000 --- a/Sources/DecartSDK/SwiftUI/RTCMLVideoViewWrapper.swift +++ /dev/null @@ -1,72 +0,0 @@ -// -// RTCMLVideoViewWrapper.swift -// DecartSDK -// -// Created by Alon Bar-el on 04/11/2025. -// -import SwiftUI -import WebRTC - -#if os(iOS) -/// A SwiftUI View that renders a WebRTC video track. -public struct RTCMLVideoViewWrapper: UIViewRepresentable { - public weak var track: RTCVideoTrack? - public var mirror: Bool - - /// Creates a new video view for the given track. - public init(track: RTCVideoTrack?, mirror: Bool = false) { - self.track = track - self.mirror = mirror - } - - public final class Coordinator { - weak var view: RTCMTLVideoView? - weak var lastTrack: RTCVideoTrack? - var lastMirror: Bool = false - - // Add a public init for the coordinator - public init() {} - } - - public func makeCoordinator() -> Coordinator { - Coordinator() - } - - public func makeUIView(context: Context) -> RTCMTLVideoView { - let view = RTCMTLVideoView() - view.videoContentMode = .scaleAspectFill - view.transform = mirror ? CGAffineTransform(scaleX: -1, y: 1) : .identity - context.coordinator.view = view - context.coordinator.lastMirror = mirror - - if let track { - track.add(view) - context.coordinator.lastTrack = track - } - return view - } - - public func updateUIView(_ uiView: RTCMTLVideoView, context: Context) { - // If the track changed, rewire attachment - if context.coordinator.lastTrack !== track { - context.coordinator.lastTrack?.remove(uiView) - if let track { - track.add(uiView) - } - context.coordinator.lastTrack = track - } - - if context.coordinator.lastMirror != mirror { - uiView.transform = mirror ? CGAffineTransform(scaleX: -1, y: 1) : .identity - context.coordinator.lastMirror = mirror - } - } - - public static func dismantleUIView(_ uiView: RTCMTLVideoView, coordinator: Coordinator) { - print("dismissing video view and nilling track!") - coordinator.lastTrack?.remove(uiView) - coordinator.view = nil - coordinator.lastTrack = nil - } -} -#endif diff --git a/Sources/DecartSDK/SwiftUI/Realtime/RTCMLVideoViewWrapper.swift b/Sources/DecartSDK/SwiftUI/Realtime/RTCMLVideoViewWrapper.swift new file mode 100644 index 0000000..33e93b7 --- /dev/null +++ b/Sources/DecartSDK/SwiftUI/Realtime/RTCMLVideoViewWrapper.swift @@ -0,0 +1,140 @@ +// +// RTCMLVideoViewWrapper.swift +// DecartSDK +// +// Created by Alon Bar-el on 04/11/2025. +// +import SwiftUI +import WebRTC +#if os(macOS) +import AppKit +import QuartzCore +#endif + +#if os(iOS) +/// A SwiftUI View that renders a WebRTC video track. +public struct RTCMLVideoViewWrapper: UIViewRepresentable { + public weak var track: RTCVideoTrack? + public var mirror: Bool + + /// Creates a new video view for the given track. + public init(track: RTCVideoTrack?, mirror: Bool = false) { + self.track = track + self.mirror = mirror + } + + public final class Coordinator { + weak var view: RTCMTLVideoView? + weak var lastTrack: RTCVideoTrack? + var lastMirror: Bool = false + + // Add a public init for the coordinator + public init() {} + } + + public func makeCoordinator() -> Coordinator { + Coordinator() + } + + public func makeUIView(context: Context) -> RTCMTLVideoView { + let view = RTCMTLVideoView() + view.videoContentMode = .scaleAspectFill + view.transform = mirror ? CGAffineTransform(scaleX: -1, y: 1) : .identity + context.coordinator.view = view + context.coordinator.lastMirror = mirror + + if let track { + track.add(view) + context.coordinator.lastTrack = track + } + return view + } + + public func updateUIView(_ uiView: RTCMTLVideoView, context: Context) { + // If the track changed, rewire attachment + if context.coordinator.lastTrack !== track { + context.coordinator.lastTrack?.remove(uiView) + if let track { + track.add(uiView) + } + context.coordinator.lastTrack = track + } + + if context.coordinator.lastMirror != mirror { + uiView.transform = mirror ? CGAffineTransform(scaleX: -1, y: 1) : .identity + context.coordinator.lastMirror = mirror + } + } + + public static func dismantleUIView(_ uiView: RTCMTLVideoView, coordinator: Coordinator) { + coordinator.lastTrack?.remove(uiView) + coordinator.view = nil + coordinator.lastTrack = nil + } +} +#elseif os(macOS) +/// A SwiftUI View that renders a WebRTC video track. +public struct RTCMLVideoViewWrapper: NSViewRepresentable { + public weak var track: RTCVideoTrack? + public var mirror: Bool + + /// Creates a new video view for the given track. + public init(track: RTCVideoTrack?, mirror: Bool = false) { + self.track = track + self.mirror = mirror + } + + public final class Coordinator { + weak var view: RTCMTLNSVideoView? + weak var lastTrack: RTCVideoTrack? + var lastMirror: Bool = false + + public init() {} + } + + public func makeCoordinator() -> Coordinator { + Coordinator() + } + + public func makeNSView(context: Context) -> RTCMTLNSVideoView { + let view = RTCMTLNSVideoView(frame: .zero) + applyMirrorIfPossible(view, mirror: mirror) + context.coordinator.view = view + context.coordinator.lastMirror = mirror + + if let track { + track.add(view) + context.coordinator.lastTrack = track + } + return view + } + + public func updateNSView(_ nsView: RTCMTLNSVideoView, context: Context) { + // If the track changed, rewire attachment + if context.coordinator.lastTrack !== track { + context.coordinator.lastTrack?.remove(nsView) + if let track { + track.add(nsView) + } + context.coordinator.lastTrack = track + } + + if context.coordinator.lastMirror != mirror { + applyMirrorIfPossible(nsView, mirror: mirror) + context.coordinator.lastMirror = mirror + } + } + + public static func dismantleNSView(_ nsView: RTCMTLNSVideoView, coordinator: Coordinator) { + coordinator.lastTrack?.remove(nsView) + coordinator.view = nil + coordinator.lastTrack = nil + } + + private func applyMirrorIfPossible(_ view: RTCMTLNSVideoView, mirror: Bool) { + view.wantsLayer = true + guard let layer = view.layer else { return } + layer.transform = mirror ? CATransform3DMakeScale(-1, 1, 1) : CATransform3DIdentity + } +} +#endif