Skip to content

Metadata correction for C82-2053 #11

Metadata correction for C82-2053

Metadata correction for C82-2053 #11

name: Validate Correction
on:
issues:
types: [opened, edited]
jobs:
validate:
if: contains(github.event.issue.labels.*.name, 'correction') && contains(github.event.issue.labels.*.name, 'metadata')
runs-on: ubuntu-latest
steps:
- name: Install dependencies
run: |
npm install axios
- name: Extract JSON and get anthology ID
id: extract-data
uses: actions/github-script@v7
with:
script: |
const body = context.payload.issue.body.replace(/\r\n/g, '\n');
const jsonMatch = body.match(/```json\n([\s\S]*?)\n```/);
if (!jsonMatch) {
core.setFailed('No JSON found in issue body');
return;
}
const jsonData = jsonMatch[1];
// Parse JSON to get anthology_id
const metadata = JSON.parse(jsonData);
delete metadata.abstract;
const anthology_id = metadata.anthology_id;
const author_list = metadata.authors.map(author => `${author.first} ${author.last}`).join(', ');
// get title, escape any double-quotes
const title = metadata.title.replace(/"/g, '\\"');
// Set output variable
core.setOutput('anthology_id', anthology_id);
core.setOutput('author_list', author_list);
core.setOutput('title', title);
- name: Validate with OpenAI
id: openai-validation
uses: actions/github-script@v7
with:
script: |
const axios = require('axios');
const anthology_id = '${{ steps.extract-data.outputs.anthology_id }}';
const author_list = '${{ steps.extract-data.outputs.author_list }}';
const title = "${{ steps.extract-data.outputs.title }}";
const pdf_url = `https://www.aclanthology.org/${anthology_id}.pdf`;
try {
const response = await axios.post(
'https://api.openai.com/v1/chat/completions',
{
model: 'gpt-4o-mini',
messages: [
{
role: "system",
content: "You are a sharp-eyed editor whose role is to validate proposed corrections to academic paper metadata. You are able to look at the first page of a PDF and determine the list of authors from reading order, as an person could, as well as to evaluate the title. You pay careful attention when comparing that information to any changes that are suggested."
},
{
role: 'user',
content: `Please download the PDF from ${pdf_url}, look at the title block on the first page, and then verify that its title and list of authors match the proposed corrections given below.\n\nFor the title corrections, please ignore formatting tags like <fixed-case>...</fixed-case> and focus on the words. It is also okay to have minor differences in title casing. The important thing is that the words are correct and complete.\n\nThe proposed title edit is: ${title}.\n\nFor the list of authors, please make sure that (1) the proposed list contains all authors, (2) each name is represented completely, and (3) the authors are in the correct order.\n\nThe proposed list of authors, given in a comma-separated list, is: ${author_list}.\n\nPlease return your result as a JSON structure with a single field, 'valid', a boolean, and a 'details' field, containing a list of detailed explanations for each of your decisions.`
}
],
max_tokens: 1000,
temperature: 0.1,
},
{
headers: {
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
'OpenAI-Organization': process.env.OPENAI_ORG,
'Content-Type': 'application/json'
}
}
);
const response_text = response.data.choices[0].message.content;
// parse out the json part (```json ... ```)
const jsonMatch = response_text.match(/```json\n([\s\S]*?)\n```/);
if (!jsonMatch) {
throw new Error('No JSON found in OpenAI response');
}
const result = jsonMatch[1];
core.setOutput('response', result);
return result;
} catch (error) {
console.error('Error calling OpenAI:', error.response?.data || error.message);
throw error;
}
result-encoding: json
github-token: ${{ secrets.GITHUB_TOKEN }}
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_ORG: ${{ secrets.OPENAI_ORG }}
- name: Comment validation results
uses: actions/github-script@v7
with:
script: |
const validation = ${{ steps.openai-validation.outputs.response }};
let comment = '';
if (validation.valid) {
comment = '✅ **LLM Validation Passed**\n\n';
} else {
comment = '❌ **LLM Validation Failed**\n\n';
}
comment += '**Details:**\n';
validation.details.forEach(detail => {
comment += `- ${detail}\n`;
});
const comments = await github.rest.issues.listComments({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo
});
// look for existing comment to update
const validationComment = comments.data.reverse().find(c =>
c.body.includes('**LLM') && c.user.login === 'github-actions[bot]'
);
if (validationComment) {
await github.rest.issues.updateComment({
comment_id: validationComment.id,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
} else {
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
}