forked from donnekgit/autoglosser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjoin_tags.php
70 lines (60 loc) · 3.12 KB
/
join_tags.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
<?php
/*
*********************************************************************
Copyright Kevin Donnelly 2010, 2011.
kevindonnelly.org.uk
This file is part of the Bangor Autoglosser.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License or the GNU
Affero General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
and the GNU Affero General Public License along with this program.
If not, see <http://www.gnu.org/licenses/>.
*********************************************************************
*/
// This file concatenates the tags in the $finished table and writes them into the $words table.
if (empty($filename))
{
include("includes/fns.php");
include("/opt/autoglosser/config.php");
list($chafile, $filename, $utterances, $words, $cgfinished)=get_filename();
}
// Concatenate the fields in the CG output file
$sql="select * from $cgfinished order by utterance_id, location, surface, pos"; // Use surface and pos order to guarantee that multiple readings will always appear in the same order - this is useful for tidying later.
$result=pg_query($db_handle,$sql) or die("Can't get the items");
while ($row=pg_fetch_object($result))
{
$enlemma=$row->enlemma.".";
$pos=($row->pos=='') ? "" : $row->pos.".";
$pos=preg_replace("/(\.archaic|\.amer|\.err|\.literary|\.north|\.nstan|\.pat|\.polite|\.short|\.vulg|\.spoken)/", "", $pos); // Remove value-judgement tags.
$extra=($row->extra =='') ? "" : "+".$row->extra."."; // needs to be changed to = to follow the Leipzig glossing rules
$seg=($row->seg =='') ? "" : "+".$row->seg; // needs to be changed to = to follow the Leipzig glossing rules
$combined1=$pos.$extra.$seg;
$combined2=strtoupper($combined1); // uppercase the POS-tags
$tags=preg_replace('/\.\+/','+', $combined2); // remove the dot before a +
$lemtags=pg_escape_string($enlemma.$tags);
$lemtags=preg_replace("/\.PRT/", "PRT", $lemtags); // remove the dot where there is no lemma
$lemtags=preg_replace('/\.$/','', $lemtags); // remove the dot at the end of the string
if ($row->utterance_id==$utt and $row->location==$loc)
{
$auto=$auto.".[or].".$lemtags; // CLAN CHECK will not allow "text[or]text", but it will allow "text.[or].text", and this keeps the gloss for a particular word in one contiguous segment.
echo "Repeat: ".$row->utterance_id.":".$row->location.": ".$auto."\n";
}
else
{
$auto=$lemtags;
$utt=$row->utterance_id;
$loc=$row->location;
echo "New: ".$utt.":".$loc.": ".$auto."\n";
}
// Write them into the words table
$sql_u="update $words set auto='$auto' where utterance_id=$utt and location=$loc";
$result_u=pg_query($db_handle,$sql_u) or die("Can't update");
}
?>