From f3fd03a891f2c21353415597787315c58e79f6f4 Mon Sep 17 00:00:00 2001 From: Arathy Rose Date: Thu, 21 Mar 2019 19:28:28 +0530 Subject: [PATCH] Fixed issue #92 --- src/lab/exp4/CLExperiment4-0.php | 39 ++-- src/lab/exp4/CLExperiment4-1.php | 44 ++--- src/lab/exp4/CLExperiment4-2.php | 11 +- src/lab/exp4/CLExperiment4.php | 13 +- src/lab/exp4/Exp4/Hindi/empty.pi | 0 src/lab/exp4/content.html | 329 +++++++++++++++---------------- 6 files changed, 200 insertions(+), 236 deletions(-) delete mode 100644 src/lab/exp4/Exp4/Hindi/empty.pi diff --git a/src/lab/exp4/CLExperiment4-0.php b/src/lab/exp4/CLExperiment4-0.php index ebf25b9a..b95aa528 100644 --- a/src/lab/exp4/CLExperiment4-0.php +++ b/src/lab/exp4/CLExperiment4-0.php @@ -27,21 +27,19 @@ function selectSize()
".$file.""; - - +foreach ($file_list as $file) { + echo ''; } -echo "

"; +echo '

'; -echo ""; +echo ''; ?>
diff --git a/src/lab/exp4/CLExperiment4-1.php b/src/lab/exp4/CLExperiment4-1.php index 54170d2c..0d26aa43 100644 --- a/src/lab/exp4/CLExperiment4-1.php +++ b/src/lab/exp4/CLExperiment4-1.php @@ -34,18 +34,17 @@ function selectSize()
@@ -54,20 +53,17 @@ function selectSize() ".$file[0].""; - $file_id++; - +foreach ($files as $file) { + $file = explode('/', $file); + echo $file[3]; + $file = explode('.', $file[3]); + echo ''; + ++$file_id; } -echo "

"; -echo "
"; +echo '

'; +echo ''; ?>
diff --git a/src/lab/exp4/CLExperiment4-2.php b/src/lab/exp4/CLExperiment4-2.php index 7a66d936..1e717dd2 100644 --- a/src/lab/exp4/CLExperiment4-2.php +++ b/src/lab/exp4/CLExperiment4-2.php @@ -10,17 +10,14 @@
"; +echo "\"Error\""; ?>
diff --git a/src/lab/exp4/CLExperiment4.php b/src/lab/exp4/CLExperiment4.php index cc0c4f4c..1134ef06 100644 --- a/src/lab/exp4/CLExperiment4.php +++ b/src/lab/exp4/CLExperiment4.php @@ -25,13 +25,12 @@ function selectLang()
"; -echo ""; ?> +echo '
'; +echo ''; ?>

diff --git a/src/lab/exp4/Exp4/Hindi/empty.pi b/src/lab/exp4/Exp4/Hindi/empty.pi deleted file mode 100644 index e69de29b..00000000 diff --git a/src/lab/exp4/content.html b/src/lab/exp4/content.html index 9c983c40..ae46333e 100644 --- a/src/lab/exp4/content.html +++ b/src/lab/exp4/content.html @@ -8,195 +8,176 @@ + + -
+
+ - -
- - + +
- + Computational Linguistics Lab +
- - + - -
- - -
-
- -
- - Heap's Law - -
+ - - + + - -
+
- -
- -
- - -
- - +
+ +
+ + +
+ + -
- Introduction -
+
+ Introduction +
- -
-
-As we gather larger copora (more instances of tokens), the corresponding number of distinct types gets diminished as we exhaust the discovery of full vocabulary. This phenomenon can be explained by the Heap's law which is formulated as: -

V = f(n) = Knβ


-where V = types
-          n = tokens
-          K and β are free parameters determined empirically


- -
-
-
+
+
+ As we gather larger copora (more instances of tokens), the corresponding number of distinct types gets diminished as we exhaust the discovery of full vocabulary. This phenomenon can be explained by the Heap's law which is formulated as: +

V = f(n) = Knβ


where V = types
          n = tokens
          K and β are free parameters determined + empirically


+ +
+
+
+ +
-
+ +
- -
- -
- - -
- - + +
+ + -
- Theory -
+
+ Theory +
- -
-

-

Heaps' Law


-The relation between types and tokens can be explained by the Heaps' law (also called Herdan's law) which describes the number of distinct words in a document (types) as a function of the document length(tokens). It can be formulated as:
-

V = f(n) = Knβ


+
+

+

Heaps' Law


The relation between types and tokens can be explained by the Heaps' law (also called Herdan's law) which describes the number of distinct words in a document (types) as a function of the document + length(tokens). It can be formulated as:
+

V = f(n) = Knβ


where V = types
          n = tokens
          K and β are free parameters determined + empirically
          The values of the parameters depend on the language and with English text corpora, typically K is between 10 and 100, and β is between 0.4 and 0.6.

Applying logarithmics to both sides of the equation, we get: +

logV = logK +βlogn +


which is an equation of a straight line with intercept logK and slope β. Heap's Law implies that as we gather larger copora (more instances of tokens), the number of distinct types returned gets diminished as we exhaust + the discovery of full vocabulary. +

+
+ -where V = types
-          n = tokens
-          K and β are free parameters determined empirically
-          The values of the parameters depend on the language and with English text corpora, typically K is between 10 and 100, and β is between 0.4 and 0.6.

+
-Applying logarithmics to both sides of the equation, we get: -

logV = logK +βlogn


-which is an equation of a straight line with intercept logK and slope β. +
+ + +
-Heap's Law implies that as we gather larger copora (more instances of tokens), the number of distinct types returned gets diminished as we exhaust the discovery of full vocabulary. -

-
- - - -
- -
- - -
- -
- Objective -
+
+ Objective +
-
-

-The objective of this experiment is to understand the relation between types and tokens with increasing corpus size. -


-
+
+

The objective of this experiment is to understand the relation between types and tokens with increasing corpus size. +

+
+
-
+ -
+
-
- - -
+
+ + +
-
- Experiment -
+
+ Experiment +
-
-
-
+
+
+
-
+
- - -
- -
- - -
- -
- Procedure -
- -
-STEP1: Select the Language which you know better
-STEP2: Select the size of corpus

-OUTPUT: Observe the graphs to know heap's law better
-
- -
- - - + +
+ +
+ Procedure +
+ +
+ STEP1: Select the Language which you know better(The experiment currently supports only english)
+ STEP2: Select the size of corpus

+ OUTPUT: Observe the graphs to know heap's law better
+
+ + + + + - - + - - + - -
- -
+ +
+ +
-
+ - - + - + - + + \ No newline at end of file