-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
60 lines (52 loc) · 2.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.pydantic_v1 import BaseModel
from langchain_experimental.tabular_synthetic_data.base import SyntheticDataGenerator
from langchain_experimental.tabular_synthetic_data.openai import create_openai_data_generator, OPENAI_TEMPLATE
from langchain_experimental.tabular_synthetic_data.prompts import SYNTHETIC_FEW_SHOT_SUFFIX, SYNTHETIC_FEW_SHOT_PREFIX
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv()) # read local .env file
examples = [
{
"example": """Patient ID: 123456, Patient Name: John Doe, Diagnosis Code:
J20.9, Procedure Code: 99203, Total Charge: $500, Insurance Claim Amount: $350"""
},
{
"example": """Patient ID: 789012, Patient Name: Johnson Smith, Diagnosis
Code: M54.5, Procedure Code: 99213, Total Charge: $150, Insurance Claim Amount: $120"""
},
{
"example": """Patient ID: 345678, Patient Name: Emily Stone, Diagnosis Code:
E11.9, Procedure Code: 99214, Total Charge: $300, Insurance Claim Amount: $250"""
},
]
class MedicalBilling(BaseModel):
patient_id: int
patient_name: str
diagnosis_code: str
procedure_code: str
total_charge: float
insurance_claim_amount: float
OPENAI_TEMPLATE = PromptTemplate(input_variables=["example"], template="{example}")
prompt_template = FewShotPromptTemplate(
prefix=SYNTHETIC_FEW_SHOT_PREFIX,
examples=examples,
suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
input_variables=["subject", "extra"],
example_prompt=OPENAI_TEMPLATE,
)
synthetic_data_generator = create_openai_data_generator(
output_schema=MedicalBilling,
llm=ChatOpenAI(
temperature=1
), # You'll need to replace with your actual Language Model instance
prompt=prompt_template,
)
if __name__ == '__main__':
synthetic_results = synthetic_data_generator.generate(
subject="medical_billing",
extra="the name must be chosen at random. Make it something you wouldn't normally choose.",
runs=10
)
for i in synthetic_results:
print(i)