diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index f68cc983..98746748 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/examples.doctree b/docs/build/doctrees/examples.doctree
index d7b8bafb..e796ff97 100644
Binary files a/docs/build/doctrees/examples.doctree and b/docs/build/doctrees/examples.doctree differ
diff --git a/docs/build/doctrees/feature_builder.doctree b/docs/build/doctrees/feature_builder.doctree
index 320c024d..b5b51b9c 100644
Binary files a/docs/build/doctrees/feature_builder.doctree and b/docs/build/doctrees/feature_builder.doctree differ
diff --git a/docs/build/doctrees/features/temporal_features.doctree b/docs/build/doctrees/features/temporal_features.doctree
index d2688009..dfc89c91 100644
Binary files a/docs/build/doctrees/features/temporal_features.doctree and b/docs/build/doctrees/features/temporal_features.doctree differ
diff --git a/docs/build/doctrees/utils/calculate_chat_level_features.doctree b/docs/build/doctrees/utils/calculate_chat_level_features.doctree
index 4bd1d9ab..3ffb735b 100644
Binary files a/docs/build/doctrees/utils/calculate_chat_level_features.doctree and b/docs/build/doctrees/utils/calculate_chat_level_features.doctree differ
diff --git a/docs/build/doctrees/utils/check_embeddings.doctree b/docs/build/doctrees/utils/check_embeddings.doctree
index bb18043e..e51810d6 100644
Binary files a/docs/build/doctrees/utils/check_embeddings.doctree and b/docs/build/doctrees/utils/check_embeddings.doctree differ
diff --git a/docs/build/doctrees/utils/preprocess.doctree b/docs/build/doctrees/utils/preprocess.doctree
index 8f297d7a..fe5a32cc 100644
Binary files a/docs/build/doctrees/utils/preprocess.doctree and b/docs/build/doctrees/utils/preprocess.doctree differ
diff --git a/docs/build/html/_sources/examples.rst.txt b/docs/build/html/_sources/examples.rst.txt
index e349c7d4..bab74995 100644
--- a/docs/build/html/_sources/examples.rst.txt
+++ b/docs/build/html/_sources/examples.rst.txt
@@ -91,10 +91,10 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de
 		speaker_id_col = "speaker_nickname",
 		message_col = "message",
 		timestamp_col = "timestamp",
-		grouping_keys = ["batch_num", "round_num"],
+		grouping_keys = ["batch_num", "round_num"], # NOTE: This example demonstrates grouping. Use conversation_id_col if you have a single conversation identifier.
 		vector_directory = "./vector_data/",
 		output_file_base = "jury_output",
-		turns = True
+		turns = True # NOTE: This defaults to False. Decide whether you want to combine successive 'utterances' by the same person as a 'turn.'
 	)
 	jury_feature_builder.featurize()
 
@@ -219,6 +219,12 @@ Regenerating Vector Cache
 
 	* By default, **we assume that, if your output file is named the same, that the underlying vectors are the same**. If this isn't true, you should set **regenerate_vectors = True** in order to clear out the cache and re-generate the RoBERTa and SBERT outputs.
 
+
+Generating Vectors using GPU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+By default, we use the CPU to generate sentence vectors and cached RoBERTa sentimets. To override this feature and use a GPU when available (which will speed up the computation of the vectors), turn ``use_gpu`` to True.
+
+
 Custom Features
 ~~~~~~~~~~~~~~~~~
 
diff --git a/docs/build/html/examples.html b/docs/build/html/examples.html
index 075879bf..e1936257 100644
--- a/docs/build/html/examples.html
+++ b/docs/build/html/examples.html
@@ -158,10 +158,10 @@ <h3>Configuring the FeatureBuilder<a class="headerlink" href="#configuring-the-f
         <span class="n">speaker_id_col</span> <span class="o">=</span> <span class="s2">&quot;speaker_nickname&quot;</span><span class="p">,</span>
         <span class="n">message_col</span> <span class="o">=</span> <span class="s2">&quot;message&quot;</span><span class="p">,</span>
         <span class="n">timestamp_col</span> <span class="o">=</span> <span class="s2">&quot;timestamp&quot;</span><span class="p">,</span>
-        <span class="n">grouping_keys</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;batch_num&quot;</span><span class="p">,</span> <span class="s2">&quot;round_num&quot;</span><span class="p">],</span>
+        <span class="n">grouping_keys</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;batch_num&quot;</span><span class="p">,</span> <span class="s2">&quot;round_num&quot;</span><span class="p">],</span> <span class="c1"># NOTE: This example demonstrates grouping. Use conversation_id_col if you have a single conversation identifier.</span>
         <span class="n">vector_directory</span> <span class="o">=</span> <span class="s2">&quot;./vector_data/&quot;</span><span class="p">,</span>
         <span class="n">output_file_base</span> <span class="o">=</span> <span class="s2">&quot;jury_output&quot;</span><span class="p">,</span>
-        <span class="n">turns</span> <span class="o">=</span> <span class="kc">True</span>
+        <span class="n">turns</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># NOTE: This defaults to False. Decide whether you want to combine successive &#39;utterances&#39; by the same person as a &#39;turn.&#39;</span>
 <span class="p">)</span>
 <span class="n">jury_feature_builder</span><span class="o">.</span><span class="n">featurize</span><span class="p">()</span>
 </pre></div>
@@ -302,6 +302,10 @@ <h5>Regenerating Vector Cache<a class="headerlink" href="#regenerating-vector-ca
 </li>
 </ul>
 </section>
+<section id="generating-vectors-using-gpu">
+<h5>Generating Vectors using GPU<a class="headerlink" href="#generating-vectors-using-gpu" title="Link to this heading"></a></h5>
+<p>By default, we use the CPU to generate sentence vectors and cached RoBERTa sentimets. To override this feature and use a GPU when available (which will speed up the computation of the vectors), turn <code class="docutils literal notranslate"><span class="pre">use_gpu</span></code> to True.</p>
+</section>
 <section id="custom-features">
 <h5>Custom Features<a class="headerlink" href="#custom-features" title="Link to this heading"></a></h5>
 <ul>
diff --git a/docs/build/html/feature_builder.html b/docs/build/html/feature_builder.html
index 489c2dfd..d6f474d0 100644
--- a/docs/build/html/feature_builder.html
+++ b/docs/build/html/feature_builder.html
@@ -63,6 +63,7 @@
 <li class="toctree-l3"><a class="reference internal" href="#feature_builder.FeatureBuilder.save_features"><code class="docutils literal notranslate"><span class="pre">FeatureBuilder.save_features()</span></code></a></li>
 <li class="toctree-l3"><a class="reference internal" href="#feature_builder.FeatureBuilder.set_self_conv_data"><code class="docutils literal notranslate"><span class="pre">FeatureBuilder.set_self_conv_data()</span></code></a></li>
 <li class="toctree-l3"><a class="reference internal" href="#feature_builder.FeatureBuilder.user_level_features"><code class="docutils literal notranslate"><span class="pre">FeatureBuilder.user_level_features()</span></code></a></li>
+<li class="toctree-l3"><a class="reference internal" href="#feature_builder.FeatureBuilder.verify_timestamp_format"><code class="docutils literal notranslate"><span class="pre">FeatureBuilder.verify_timestamp_format()</span></code></a></li>
 </ul>
 </li>
 </ul>
@@ -98,7 +99,7 @@
 <span id="feature-builder-module"></span><span id="feature-builder"></span><h1>feature_builder module<a class="headerlink" href="#module-feature_builder" title="Link to this heading"></a></h1>
 <dl class="py class">
 <dt class="sig sig-object py" id="feature_builder.FeatureBuilder">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">feature_builder.</span></span><span class="sig-name descname"><span class="pre">FeatureBuilder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vector_directory</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'./vector_data/'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_base</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'output'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_chat_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_user_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_conv_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_features</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">analyze_first_pct</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[1.0]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">turns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">conversation_id_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'conversation_num'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">speaker_id_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'speaker_nickname'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'message'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timestamp_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'timestamp'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timestamp_unit</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ms'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">grouping_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cumulative_grouping</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">within_task</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ner_training_df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ner_cutoff</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0.9</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">regenerate_vectors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compute_vectors_from_preprocessed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_liwc_dictionary_path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convo_aggregation</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convo_methods</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">['mean',</span> <span class="pre">'max',</span> <span class="pre">'min',</span> <span class="pre">'stdev']</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convo_columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_aggregation</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_methods</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">['mean',</span> <span class="pre">'max',</span> <span class="pre">'min',</span> <span class="pre">'stdev']</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#feature_builder.FeatureBuilder" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">feature_builder.</span></span><span class="sig-name descname"><span class="pre">FeatureBuilder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vector_directory</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'./vector_data/'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_base</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'output'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_chat_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_user_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_conv_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_features</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">analyze_first_pct</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[1.0]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">turns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">conversation_id_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'conversation_num'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">speaker_id_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'speaker_nickname'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'message'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timestamp_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'timestamp'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timestamp_unit</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ms'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">grouping_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cumulative_grouping</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">within_task</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ner_training_df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ner_cutoff</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0.9</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">regenerate_vectors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compute_vectors_from_preprocessed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_liwc_dictionary_path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convo_aggregation</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convo_methods</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">['mean',</span> <span class="pre">'max',</span> <span class="pre">'min',</span> <span class="pre">'stdev']</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convo_columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_aggregation</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_methods</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">['mean',</span> <span class="pre">'max',</span> <span class="pre">'min',</span> <span class="pre">'stdev']</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#feature_builder.FeatureBuilder" title="Link to this definition"></a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
 <p>The FeatureBuilder is the main engine that reads in the user’s inputs and specifications and generates
 conversational features. The FeatureBuilder separately calls the classes
@@ -163,6 +164,7 @@
 Defaults to [‘mean’, ‘max’, ‘min’, ‘stdev’].</p></li>
 <li><p><strong>user_columns</strong> (<em>list</em><em>, </em><em>optional</em>) – Specifies which columns (at the utterance/chat level) to aggregate for the
 speaker/user level. Defaults to all numeric columns.</p></li>
+<li><p><strong>use_gpu</strong> (<em>bool</em><em>, </em><em>optional</em>) – Specifies whether to use GPU for vert/bert model. Defaults to False.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -350,6 +352,27 @@
 </dl>
 </dd></dl>
 
+<dl class="py method">
+<dt class="sig sig-object py" id="feature_builder.FeatureBuilder.verify_timestamp_format">
+<span class="sig-name descname"><span class="pre">verify_timestamp_format</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">timestamp_col</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#feature_builder.FeatureBuilder.verify_timestamp_format" title="Link to this definition"></a></dt>
+<dd><p>Verifies that a column in a DataFrame is composed of values that can be parsed
+either as datetime or as numeric values suitable for time difference calculations.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>timestamp_col</strong> (<em>str</em>) – The name of the column to verify</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>None</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>None</p>
+</dd>
+<dt class="field-even">Raises<span class="colon">:</span></dt>
+<dd class="field-even"><p><strong>ValueError</strong> – If the column contains values that cannot be parsed as datetime or numeric.</p>
+</dd>
+</dl>
+</dd></dl>
+
 </dd></dl>
 
 </section>
diff --git a/docs/build/html/features/temporal_features.html b/docs/build/html/features/temporal_features.html
index d2f6d850..0e073f57 100644
--- a/docs/build/html/features/temporal_features.html
+++ b/docs/build/html/features/temporal_features.html
@@ -60,7 +60,6 @@
 <li class="toctree-l3"><a class="reference internal" href="politeness_features.html">politeness_features module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="hedge.html">hedge module</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">temporal_features module</a><ul>
-<li class="toctree-l4"><a class="reference internal" href="#features.temporal_features.coerce_to_date_or_number"><code class="docutils literal notranslate"><span class="pre">coerce_to_date_or_number()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#features.temporal_features.get_time_diff"><code class="docutils literal notranslate"><span class="pre">get_time_diff()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#features.temporal_features.get_time_diff_startend"><code class="docutils literal notranslate"><span class="pre">get_time_diff_startend()</span></code></a></li>
 </ul>
@@ -111,21 +110,6 @@
              
   <section id="module-features.temporal_features">
 <span id="temporal-features-module"></span><h1>temporal_features module<a class="headerlink" href="#module-features.temporal_features" title="Link to this heading"></a></h1>
-<dl class="py function">
-<dt class="sig sig-object py" id="features.temporal_features.coerce_to_date_or_number">
-<span class="sig-prename descclassname"><span class="pre">features.temporal_features.</span></span><span class="sig-name descname"><span class="pre">coerce_to_date_or_number</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#features.temporal_features.coerce_to_date_or_number" title="Link to this definition"></a></dt>
-<dd><p>Helper function in which we check that the timestamp column contains either a datetime value or a number
-that can be interpreted as a time elapsed; otherwise, sets it equal to none.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>value</strong> – The value to check; type can be anything</p>
-</dd>
-<dt class="field-even">Returns<span class="colon">:</span></dt>
-<dd class="field-even"><p>Either the value itself (if it is a valid timestamp value) or None otherwise</p>
-</dd>
-</dl>
-</dd></dl>
-
 <dl class="py function">
 <dt class="sig sig-object py" id="features.temporal_features.get_time_diff">
 <span class="sig-prename descclassname"><span class="pre">features.temporal_features.</span></span><span class="sig-name descname"><span class="pre">get_time_diff</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">on_column</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">conversation_id_col</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timestamp_unit</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#features.temporal_features.get_time_diff" title="Link to this definition"></a></dt>
diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html
index f5b21ec3..4c705366 100644
--- a/docs/build/html/genindex.html
+++ b/docs/build/html/genindex.html
@@ -96,6 +96,7 @@ <h1 id="index">Index</h1>
  | <a href="#S"><strong>S</strong></a>
  | <a href="#T"><strong>T</strong></a>
  | <a href="#U"><strong>U</strong></a>
+ | <a href="#V"><strong>V</strong></a>
  | <a href="#W"><strong>W</strong></a>
  
 </div>
@@ -106,8 +107,6 @@ <h2 id="A">A</h2>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="utils/preprocess.html#utils.preprocess.assert_key_columns_present">assert_key_columns_present() (in module utils.preprocess)</a>
-</li>
       <li><a href="utils/assign_chunk_nums.html#utils.assign_chunk_nums.assign_chunk_nums">assign_chunk_nums() (in module utils.assign_chunk_nums)</a>
 </li>
   </ul></td>
@@ -169,8 +168,6 @@ <h2 id="C">C</h2>
       <li><a href="features/readability.html#features.readability.classify_text_dalechall">classify_text_dalechall() (in module features.readability)</a>
 </li>
       <li><a href="features/politeness_v2_helper.html#features.politeness_v2_helper.clean_text">clean_text() (in module features.politeness_v2_helper)</a>
-</li>
-      <li><a href="features/temporal_features.html#features.temporal_features.coerce_to_date_or_number">coerce_to_date_or_number() (in module features.temporal_features)</a>
 </li>
       <li><a href="features/politeness_v2_helper.html#features.politeness_v2_helper.commit_data">commit_data() (in module features.politeness_v2_helper)</a>
 </li>
@@ -186,10 +183,10 @@ <h2 id="C">C</h2>
 </li>
       <li><a href="features/politeness_v2_helper.html#features.politeness_v2_helper.conjection_seperator">conjection_seperator() (in module features.politeness_v2_helper)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="features/word_mimicry.html#features.word_mimicry.Content_mimicry_score">Content_mimicry_score() (in module features.word_mimicry)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="features/word_mimicry.html#features.word_mimicry.Content_mimicry_score_per_conv">Content_mimicry_score_per_conv() (in module features.word_mimicry)</a>
 </li>
       <li><a href="feature_builder.html#feature_builder.FeatureBuilder.conv_level_features">conv_level_features() (feature_builder.FeatureBuilder method)</a>
@@ -932,6 +929,14 @@ <h2 id="U">U</h2>
   </ul></td>
 </tr></table>
 
+<h2 id="V">V</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="feature_builder.html#feature_builder.FeatureBuilder.verify_timestamp_format">verify_timestamp_format() (feature_builder.FeatureBuilder method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
 <h2 id="W">W</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv
index 511596ab..cd350844 100644
Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index 3718c59f..f5ab94f6 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"A Light-Touch, One-Function Package": [[0, "a-light-touch-one-function-package"]], "Additional FeatureBuilder Considerations": [[1, "additional-featurebuilder-considerations"]], "Advanced Configuration Columns": [[1, "advanced-configuration-columns"]], "Aggregation Overview": [[1, "id2"]], "Analyzing First Percentage (%)": [[1, "analyzing-first-percentage"]], "Base Conversation-Level Features": [[11, "base-conversation-level-features"]], "Basic Input Columns": [[1, "basic-input-columns"]], "Certainty": [[30, null]], "Citation": [[29, "citation"], [30, "citation"], [31, "citation"], [32, "citation"], [33, "citation"], [34, "citation"], [35, "citation"], [36, "citation"], [37, "citation"], [38, "citation"], [40, "citation"], [41, "citation"], [42, "citation"], [43, "citation"], [44, "citation"], [45, "citation"], [46, "citation"], [47, "citation"], [48, "citation"], [49, "citation"], [50, "citation"], [51, "citation"], [52, "citation"], [53, "citation"], [54, "citation"], [55, "citation"], [56, "citation"], [57, "citation"], [58, "citation"], [59, "citation"], [60, "citation"]], "Configuring the FeatureBuilder": [[1, "configuring-the-featurebuilder"]], "Content Word Accommodation": [[31, null]], "Contents:": [[61, null]], "Conversation Parameters": [[1, "conversation-parameters"]], "Conversation-Level Aggregates": [[11, "conversation-level-aggregates"]], "Conversation-Level Features": [[11, "conversation-level-features"], [39, "conversation-level-features"]], "Conversational Repair": [[32, null]], "Cumulative Grouping": [[1, "cumulative-grouping"]], "Custom Aggregation": [[1, "custom-aggregation"]], "Custom Features": [[1, "custom-features"]], "Customizable Parameters": [[0, "customizable-parameters"]], "Dale-Chall Score": [[33, null]], "Declaring a FeatureBuilder": [[61, "declaring-a-featurebuilder"]], "Demo / Sample Code": [[0, "demo-sample-code"], [1, "demo-sample-code"]], "Discursive Diversity": [[34, null]], "Example Usage of Custom Aggregation Parameters": [[1, "example-usage-of-custom-aggregation-parameters"]], "Example:": [[41, "example"]], "FEATURE NAME": [[29, null]], "Feature Column Names": [[1, "feature-column-names"], [61, "feature-column-names"]], "Feature Documentation": [[62, "feature-documentation"]], "Feature Information": [[1, "feature-information"], [61, "feature-information"]], "Features: Conceptual Documentation": [[39, null]], "Features: Technical Documentation": [[11, null]], "Forward Flow": [[35, null]], "Function Word Accommodation": [[36, null]], "Generating Features: Utterance-, Speaker-, and Conversation-Level": [[62, "generating-features-utterance-speaker-and-conversation-level"]], "Getting Started": [[1, "getting-started"], [61, "getting-started"], [62, "getting-started"]], "Gini Coefficient": [[37, null]], "Hedge": [[38, null]], "High*Level Intuition": [[54, "high-level-intuition"]], "High-Level Intuition": [[29, "high-level-intuition"], [30, "high-level-intuition"], [31, "high-level-intuition"], [32, "high-level-intuition"], [33, "high-level-intuition"], [34, "high-level-intuition"], [35, "high-level-intuition"], [36, "high-level-intuition"], [37, "high-level-intuition"], [38, "high-level-intuition"], [40, "high-level-intuition"], [41, "high-level-intuition"], [42, "high-level-intuition"], [43, "high-level-intuition"], [44, "high-level-intuition"], [45, "high-level-intuition"], [46, "high-level-intuition"], [47, "high-level-intuition"], [48, "high-level-intuition"], [49, "high-level-intuition"], [50, "high-level-intuition"], [51, "high-level-intuition"], [52, "high-level-intuition"], [53, "high-level-intuition"], [55, "high-level-intuition"], [56, "high-level-intuition"], [57, "high-level-intuition"], [58, "high-level-intuition"], [59, "high-level-intuition"], [60, "high-level-intuition"]], "Implementation": [[32, "implementation"], [42, "implementation"], [52, "implementation"], [54, "implementation"]], "Implementation Basics": [[29, "implementation-basics"], [30, "implementation-basics"], [31, "implementation-basics"], [33, "implementation-basics"], [34, "implementation-basics"], [35, "implementation-basics"], [36, "implementation-basics"], [37, "implementation-basics"], [38, "implementation-basics"], [40, "implementation-basics"], [41, "implementation-basics"], [43, "implementation-basics"], [44, "implementation-basics"], [45, "implementation-basics"], [46, "implementation-basics"], [47, "implementation-basics"], [48, "implementation-basics"], [49, "implementation-basics"], [50, "implementation-basics"], [51, "implementation-basics"], [53, "implementation-basics"], [55, "implementation-basics"], [56, "implementation-basics"], [57, "implementation-basics"], [58, "implementation-basics"], [59, "implementation-basics"], [60, "implementation-basics"]], "Implementation Notes/Caveats": [[29, "implementation-notes-caveats"], [30, "implementation-notes-caveats"], [31, "implementation-notes-caveats"], [33, "implementation-notes-caveats"], [34, "implementation-notes-caveats"], [35, "implementation-notes-caveats"], [36, "implementation-notes-caveats"], [38, "implementation-notes-caveats"], [40, "implementation-notes-caveats"], [41, "implementation-notes-caveats"], [43, "implementation-notes-caveats"], [44, "implementation-notes-caveats"], [45, "implementation-notes-caveats"], [46, "implementation-notes-caveats"], [47, "implementation-notes-caveats"], [48, "implementation-notes-caveats"], [49, "implementation-notes-caveats"], [50, "implementation-notes-caveats"], [51, "implementation-notes-caveats"], [53, "implementation-notes-caveats"], [55, "implementation-notes-caveats"], [56, "implementation-notes-caveats"], [57, "implementation-notes-caveats"], [58, "implementation-notes-caveats"], [59, "implementation-notes-caveats"]], "Import Recommendations: Virtual Environment and Pip": [[1, "import-recommendations-virtual-environment-and-pip"], [61, "import-recommendations-virtual-environment-and-pip"]], "Important Notes and Caveats": [[1, "important-notes-and-caveats"]], "Importing the Package": [[1, "importing-the-package"]], "Indices and Tables": [[61, "indices-and-tables"]], "Information Diversity": [[40, null]], "Information Exchange": [[41, null]], "Input File": [[34, "id2"]], "Inspecting Generated Features": [[1, "inspecting-generated-features"], [61, "inspecting-generated-features"]], "Interpretation:": [[41, "interpretation"]], "Interpreting the Feature": [[29, "interpreting-the-feature"], [30, "interpreting-the-feature"], [31, "interpreting-the-feature"], [32, "interpreting-the-feature"], [33, "interpreting-the-feature"], [34, "interpreting-the-feature"], [35, "interpreting-the-feature"], [36, "interpreting-the-feature"], [37, "interpreting-the-feature"], [38, "interpreting-the-feature"], [40, "interpreting-the-feature"], [41, "interpreting-the-feature"], [42, "interpreting-the-feature"], [43, "interpreting-the-feature"], [44, "interpreting-the-feature"], [45, "interpreting-the-feature"], [46, "interpreting-the-feature"], [47, "interpreting-the-feature"], [48, "interpreting-the-feature"], [49, "interpreting-the-feature"], [50, "interpreting-the-feature"], [51, "interpreting-the-feature"], [52, "interpreting-the-feature"], [53, "interpreting-the-feature"], [54, "interpreting-the-feature"], [55, "interpreting-the-feature"], [56, "interpreting-the-feature"], [57, "interpreting-the-feature"], [58, "interpreting-the-feature"], [59, "interpreting-the-feature"], [60, "interpreting-the-feature"]], "Introduction": [[62, null]], "Key Assumptions and Parameters": [[0, "key-assumptions-and-parameters"]], "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons": [[42, null]], "Message Length": [[43, null]], "Message Quantity": [[44, null]], "Mimicry (BERT)": [[45, null]], "Motivation": [[62, "motivation"]], "Moving Mimicry": [[46, null]], "Named Entity Recognition": [[1, "named-entity-recognition"], [47, null]], "Named Entity Training Examples": [[47, "id2"]], "New in v.1.0.5: \u201cBring Your Own LIWC\u201d Custom Lexicon": [[42, "new-in-v-1-0-5-bring-your-own-liwc-custom-lexicon"]], "Online Discussion Tags": [[48, null]], "Other Utilities": [[69, "other-utilities"]], "Ouput File": [[34, "id3"]], "Our Team": [[62, "our-team"]], "Output File": [[30, "id2"], [35, "id2"], [45, "id2"], [46, "id2"], [47, "id3"], [51, "id1"]], "Output File Naming Details": [[1, "output-file-naming-details"]], "Package Assumptions": [[0, "package-assumptions"]], "Politeness Strategies": [[50, null]], "Politeness/Receptiveness Markers": [[49, null]], "Positivity Z-Score": [[52, null]], "Proportion of First Person Pronouns": [[53, null]], "Question (Naive)": [[54, null]], "Regenerating Vector Cache": [[1, "regenerating-vector-cache"]], "Related Features": [[29, "related-features"], [30, "related-features"], [31, "related-features"], [32, "related-features"], [33, "related-features"], [34, "related-features"], [35, "related-features"], [36, "related-features"], [37, "related-features"], [38, "related-features"], [40, "related-features"], [41, "related-features"], [42, "related-features"], [43, "related-features"], [44, "related-features"], [45, "related-features"], [46, "related-features"], [47, "related-features"], [48, "related-features"], [49, "related-features"], [50, "related-features"], [51, "related-features"], [52, "related-features"], [53, "related-features"], [54, "related-features"], [55, "related-features"], [56, "related-features"], [57, "related-features"], [58, "related-features"], [59, "related-features"], [60, "related-features"]], "Sentiment (RoBERTa)": [[51, null]], "Speaker Turn Counts": [[59, "id2"]], "Speaker- (User) Level Features": [[11, "speaker-user-level-features"]], "Table of Contents": [[61, "table-of-contents"]], "Team Burstiness": [[55, null]], "Textblob Polarity": [[56, null]], "Textblob Subjectivity": [[57, null]], "The Basics (Get Started Here!)": [[0, null]], "The FeatureBuilder": [[62, "the-featurebuilder"]], "The Team Communication Toolkit": [[61, null]], "Time Difference": [[58, null]], "Troubleshooting": [[1, "troubleshooting"], [61, "troubleshooting"]], "Turn Taking Index": [[59, null]], "Turns": [[1, "turns"]], "Using the Package": [[61, "using-the-package"]], "Utilities": [[69, null]], "Utterance- (Chat) Level Features": [[11, "utterance-chat-level-features"], [39, "utterance-chat-level-features"]], "Vector Directory": [[1, "vector-directory"]], "Walkthrough: Running the FeatureBuilder on Your Data": [[1, "walkthrough-running-the-featurebuilder-on-your-data"]], "Word Type-Token Ratio": [[60, null]], "Worked Example": [[1, null]], "assign_chunk_nums module": [[63, null]], "basic_features module": [[3, null]], "burstiness module": [[4, null]], "calculate_chat_level_features module": [[64, null]], "calculate_conversation_level_features module": [[65, null]], "calculate_user_level_features module": [[66, null]], "certainty module": [[5, null]], "check_embeddings module": [[67, null]], "discursive_diversity module": [[6, null]], "feature_builder module": [[2, null]], "fflow module": [[7, null]], "get_all_DD_features module": [[8, null]], "get_user_network module": [[9, null]], "gini_coefficient module": [[68, null]], "hedge module": [[10, null]], "info_exchange_zscore module": [[12, null]], "information_diversity module": [[13, null]], "lexical_features_v2 module": [[14, null]], "named_entity_recognition_features module": [[15, null]], "other_lexical_features module": [[16, null]], "politeness_features module": [[17, null]], "politeness_v2 module": [[18, null]], "politeness_v2_helper module": [[19, null]], "preload_word_lists module": [[70, null]], "preprocess module": [[71, null]], "question_num module": [[20, null]], "readability module": [[21, null]], "reddit_tags module": [[22, null]], "summarize_features module": [[72, null]], "temporal_features module": [[23, null]], "textblob_sentiment_analysis module": [[24, null]], "turn_taking_features module": [[25, null]], "variance_in_DD module": [[26, null]], "within_person_discursive_range module": [[27, null]], "word_mimicry module": [[28, null]], "z-scores:": [[41, "z-scores"]], "zscore_chats_and_conversation module": [[73, null]], "\u201cDriver\u201d Classes: Utterance-, Conversation-, and Speaker-Level Features": [[69, "driver-classes-utterance-conversation-and-speaker-level-features"]]}, "docnames": ["basics", "examples", "feature_builder", "features/basic_features", "features/burstiness", "features/certainty", "features/discursive_diversity", "features/fflow", "features/get_all_DD_features", "features/get_user_network", "features/hedge", "features/index", "features/info_exchange_zscore", "features/information_diversity", "features/lexical_features_v2", "features/named_entity_recognition_features", "features/other_lexical_features", "features/politeness_features", "features/politeness_v2", "features/politeness_v2_helper", "features/question_num", "features/readability", "features/reddit_tags", "features/temporal_features", "features/textblob_sentiment_analysis", "features/turn_taking_features", "features/variance_in_DD", "features/within_person_discursive_range", "features/word_mimicry", "features_conceptual/TEMPLATE", "features_conceptual/certainty", "features_conceptual/content_word_accommodation", "features_conceptual/conversational_repair", "features_conceptual/dale_chall_score", "features_conceptual/discursive_diversity", "features_conceptual/forward_flow", "features_conceptual/function_word_accommodation", "features_conceptual/gini_coefficient", "features_conceptual/hedge", "features_conceptual/index", "features_conceptual/information_diversity", "features_conceptual/information_exchange", "features_conceptual/liwc", "features_conceptual/message_length", "features_conceptual/message_quantity", "features_conceptual/mimicry_bert", "features_conceptual/moving_mimicry", "features_conceptual/named_entity_recognition", "features_conceptual/online_discussions_tags", "features_conceptual/politeness_receptiveness_markers", "features_conceptual/politeness_strategies", "features_conceptual/positivity_bert", "features_conceptual/positivity_z_score", "features_conceptual/proportion_of_first_person_pronouns", "features_conceptual/questions", "features_conceptual/team_burstiness", "features_conceptual/textblob_polarity", "features_conceptual/textblob_subjectivity", "features_conceptual/time_difference", "features_conceptual/turn_taking_index", "features_conceptual/word_ttr", "index", "intro", "utils/assign_chunk_nums", "utils/calculate_chat_level_features", "utils/calculate_conversation_level_features", "utils/calculate_user_level_features", "utils/check_embeddings", "utils/gini_coefficient", "utils/index", "utils/preload_word_lists", "utils/preprocess", "utils/summarize_features", "utils/zscore_chats_and_conversation"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["basics.rst", "examples.rst", "feature_builder.rst", "features/basic_features.rst", "features/burstiness.rst", "features/certainty.rst", "features/discursive_diversity.rst", "features/fflow.rst", "features/get_all_DD_features.rst", "features/get_user_network.rst", "features/hedge.rst", "features/index.rst", "features/info_exchange_zscore.rst", "features/information_diversity.rst", "features/lexical_features_v2.rst", "features/named_entity_recognition_features.rst", "features/other_lexical_features.rst", "features/politeness_features.rst", "features/politeness_v2.rst", "features/politeness_v2_helper.rst", "features/question_num.rst", "features/readability.rst", "features/reddit_tags.rst", "features/temporal_features.rst", "features/textblob_sentiment_analysis.rst", "features/turn_taking_features.rst", "features/variance_in_DD.rst", "features/within_person_discursive_range.rst", "features/word_mimicry.rst", "features_conceptual/TEMPLATE.rst", "features_conceptual/certainty.rst", "features_conceptual/content_word_accommodation.rst", "features_conceptual/conversational_repair.rst", "features_conceptual/dale_chall_score.rst", "features_conceptual/discursive_diversity.rst", "features_conceptual/forward_flow.rst", "features_conceptual/function_word_accommodation.rst", "features_conceptual/gini_coefficient.rst", "features_conceptual/hedge.rst", "features_conceptual/index.rst", "features_conceptual/information_diversity.rst", "features_conceptual/information_exchange.rst", "features_conceptual/liwc.rst", "features_conceptual/message_length.rst", "features_conceptual/message_quantity.rst", "features_conceptual/mimicry_bert.rst", "features_conceptual/moving_mimicry.rst", "features_conceptual/named_entity_recognition.rst", "features_conceptual/online_discussions_tags.rst", "features_conceptual/politeness_receptiveness_markers.rst", "features_conceptual/politeness_strategies.rst", "features_conceptual/positivity_bert.rst", "features_conceptual/positivity_z_score.rst", "features_conceptual/proportion_of_first_person_pronouns.rst", "features_conceptual/questions.rst", "features_conceptual/team_burstiness.rst", "features_conceptual/textblob_polarity.rst", "features_conceptual/textblob_subjectivity.rst", "features_conceptual/time_difference.rst", "features_conceptual/turn_taking_index.rst", "features_conceptual/word_ttr.rst", "index.rst", "intro.rst", "utils/assign_chunk_nums.rst", "utils/calculate_chat_level_features.rst", "utils/calculate_conversation_level_features.rst", "utils/calculate_user_level_features.rst", "utils/check_embeddings.rst", "utils/gini_coefficient.rst", "utils/index.rst", "utils/preload_word_lists.rst", "utils/preprocess.rst", "utils/summarize_features.rst", "utils/zscore_chats_and_conversation.rst"], "indexentries": {"adverb_limiter() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.adverb_limiter", false]], "assert_key_columns_present() (in module utils.preprocess)": [[71, "utils.preprocess.assert_key_columns_present", false]], "assign_chunk_nums() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.assign_chunk_nums", false]], "bare_command() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.bare_command", false]], "built_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.built_spacy_ner", false]], "burstiness() (in module features.burstiness)": [[4, "features.burstiness.burstiness", false]], "calculate_chat_level_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_chat_level_features", false]], "calculate_conversation_level_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_conversation_level_features", false]], "calculate_hedge_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_hedge_features", false]], "calculate_id_score() (in module features.information_diversity)": [[13, "features.information_diversity.calculate_ID_score", false]], "calculate_info_diversity() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_info_diversity", false]], "calculate_named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.calculate_named_entities", false]], "calculate_num_question_naive() (in module features.question_num)": [[20, "features.question_num.calculate_num_question_naive", false]], "calculate_politeness_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_sentiment", false]], "calculate_politeness_v2() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_v2", false]], "calculate_team_burstiness() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_team_burstiness", false]], "calculate_textblob_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_textblob_sentiment", false]], "calculate_user_level_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.calculate_user_level_features", false]], "calculate_vector_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_vector_word_mimicry", false]], "calculate_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_word_mimicry", false]], "chat_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.chat_level_features", false]], "chatlevelfeaturescalculator (class in utils.calculate_chat_level_features)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator", false]], "check_embeddings() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.check_embeddings", false]], "classify_ntri() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.classify_NTRI", false]], "classify_text_dalechall() (in module features.readability)": [[21, "features.readability.classify_text_dalechall", false]], "clean_text() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.clean_text", false]], "coerce_to_date_or_number() (in module features.temporal_features)": [[23, "features.temporal_features.coerce_to_date_or_number", false]], "commit_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.commit_data", false]], "compress() (in module utils.preprocess)": [[71, "utils.preprocess.compress", false]], "compute_frequency() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency", false]], "compute_frequency_per_conv() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency_per_conv", false]], "computetf() (in module features.word_mimicry)": [[28, "features.word_mimicry.computeTF", false]], "concat_bert_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.concat_bert_features", false]], "conjection_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.conjection_seperator", false]], "content_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score", false]], "content_mimicry_score_per_conv() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score_per_conv", false]], "conv_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.conv_level_features", false]], "conv_to_float_arr() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.conv_to_float_arr", false]], "conversationlevelfeaturescalculator (class in utils.calculate_conversation_level_features)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator", false]], "count_all_caps() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_all_caps", false]], "count_bullet_points() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_bullet_points", false]], "count_characters() (in module features.basic_features)": [[3, "features.basic_features.count_characters", false]], "count_difficult_words() (in module features.readability)": [[21, "features.readability.count_difficult_words", false]], "count_ellipses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_ellipses", false]], "count_emojis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emojis", false]], "count_emphasis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emphasis", false]], "count_line_breaks() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_line_breaks", false]], "count_links() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_links", false]], "count_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_matches", false]], "count_messages() (in module features.basic_features)": [[3, "features.basic_features.count_messages", false]], "count_numbering() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_numbering", false]], "count_parentheses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_parentheses", false]], "count_quotes() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_quotes", false]], "count_responding_to_someone() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_responding_to_someone", false]], "count_spacy_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_spacy_matches", false]], "count_syllables() (in module features.readability)": [[21, "features.readability.count_syllables", false]], "count_turn_taking_index() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turn_taking_index", false]], "count_turns() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turns", false]], "count_user_references() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_user_references", false]], "count_words() (in module features.basic_features)": [[3, "features.basic_features.count_words", false]], "create_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks", false]], "create_chunks_messages() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks_messages", false]], "create_cumulative_rows() (in module utils.preprocess)": [[71, "utils.preprocess.create_cumulative_rows", false]], "dale_chall_helper() (in module features.readability)": [[21, "features.readability.dale_chall_helper", false]], "feat_counts() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.feat_counts", false]], "feature_builder": [[2, "module-feature_builder", false]], "featurebuilder (class in feature_builder)": [[2, "feature_builder.FeatureBuilder", false]], "features.basic_features": [[3, "module-features.basic_features", false]], "features.burstiness": [[4, "module-features.burstiness", false]], "features.certainty": [[5, "module-features.certainty", false]], "features.discursive_diversity": [[6, "module-features.discursive_diversity", false]], "features.fflow": [[7, "module-features.fflow", false]], "features.get_all_dd_features": [[8, "module-features.get_all_DD_features", false]], "features.get_user_network": [[9, "module-features.get_user_network", false]], "features.hedge": [[10, "module-features.hedge", false]], "features.info_exchange_zscore": [[12, "module-features.info_exchange_zscore", false]], "features.information_diversity": [[13, "module-features.information_diversity", false]], "features.lexical_features_v2": [[14, "module-features.lexical_features_v2", false]], "features.named_entity_recognition_features": [[15, "module-features.named_entity_recognition_features", false]], "features.other_lexical_features": [[16, "module-features.other_lexical_features", false]], "features.politeness_features": [[17, "module-features.politeness_features", false]], "features.politeness_v2": [[18, "module-features.politeness_v2", false]], "features.politeness_v2_helper": [[19, "module-features.politeness_v2_helper", false]], "features.question_num": [[20, "module-features.question_num", false]], "features.readability": [[21, "module-features.readability", false]], "features.reddit_tags": [[22, "module-features.reddit_tags", false]], "features.temporal_features": [[23, "module-features.temporal_features", false]], "features.textblob_sentiment_analysis": [[24, "module-features.textblob_sentiment_analysis", false]], "features.turn_taking_features": [[25, "module-features.turn_taking_features", false]], "features.variance_in_dd": [[26, "module-features.variance_in_DD", false]], "features.within_person_discursive_range": [[27, "module-features.within_person_discursive_range", false]], "features.word_mimicry": [[28, "module-features.word_mimicry", false]], "featurize() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.featurize", false]], "fix_abbreviations() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.fix_abbreviations", false]], "function_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.function_mimicry_score", false]], "generate_bert() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_bert", false]], "generate_certainty_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_certainty_pkl", false]], "generate_lexicon_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_lexicon_pkl", false]], "generate_vect() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_vect", false]], "get_centroids() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_centroids", false]], "get_certainty() (in module features.certainty)": [[5, "features.certainty.get_certainty", false]], "get_certainty_score() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_certainty_score", false]], "get_content_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_content_words_in_message", false]], "get_conversation_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_conversation_level_aggregates", false]], "get_cosine_similarity() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_cosine_similarity", false]], "get_dale_chall_easy_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_dale_chall_easy_words", false]], "get_dale_chall_score_and_classfication() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_dale_chall_score_and_classfication", false]], "get_dd() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_DD", false]], "get_dd_features() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.get_DD_features", false]], "get_dep_pairs() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs", false]], "get_dep_pairs_noneg() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs_noneg", false]], "get_discursive_diversity_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_discursive_diversity_features", false]], "get_first_pct_of_chat() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.get_first_pct_of_chat", false]], "get_first_person_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_first_person_words", false]], "get_forward_flow() (in module features.fflow)": [[7, "features.fflow.get_forward_flow", false]], "get_forward_flow() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_forward_flow", false]], "get_function_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_function_words", false]], "get_function_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_function_words_in_message", false]], "get_gini() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.get_gini", false]], "get_gini_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_gini_features", false]], "get_info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.get_info_diversity", false]], "get_info_exchange_wordcount() (in module features.info_exchange_zscore)": [[12, "features.info_exchange_zscore.get_info_exchange_wordcount", false]], "get_liwc_count() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.get_liwc_count", false]], "get_max() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_max", false]], "get_mean() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_mean", false]], "get_median() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_median", false]], "get_mimicry_bert() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_mimicry_bert", false]], "get_min() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_min", false]], "get_moving_mimicry() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_moving_mimicry", false]], "get_named_entity() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_named_entity", false]], "get_nan_vector() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_nan_vector", false]], "get_nan_vector() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_nan_vector", false]], "get_polarity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_polarity_score", false]], "get_politeness_strategies() (in module features.politeness_features)": [[17, "features.politeness_features.get_politeness_strategies", false]], "get_politeness_v2() (in module features.politeness_v2)": [[18, "features.politeness_v2.get_politeness_v2", false]], "get_proportion_first_pronouns() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_proportion_first_pronouns", false]], "get_question_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_question_words", false]], "get_reddit_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_reddit_features", false]], "get_sentiment() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_sentiment", false]], "get_stdev() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_stdev", false]], "get_subjectivity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_subjectivity_score", false]], "get_sum() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_sum", false]], "get_team_burstiness() (in module features.burstiness)": [[4, "features.burstiness.get_team_burstiness", false]], "get_temporal_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_temporal_features", false]], "get_time_diff() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff", false]], "get_time_diff_startend() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff_startend", false]], "get_turn() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.get_turn", false]], "get_turn_id() (in module utils.preprocess)": [[71, "utils.preprocess.get_turn_id", false]], "get_turn_taking_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_turn_taking_features", false]], "get_unique_pairwise_combos() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_unique_pairwise_combos", false]], "get_user_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_user_level_aggregates", false]], "get_user_level_summary_statistics_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summary_statistics_features", false]], "get_user_level_summed_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summed_features", false]], "get_user_max_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_max_dataframe", false]], "get_user_mean_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_mean_dataframe", false]], "get_user_median_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_median_dataframe", false]], "get_user_min_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_min_dataframe", false]], "get_user_network() (in module features.get_user_network)": [[9, "features.get_user_network.get_user_network", false]], "get_user_network() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_network", false]], "get_user_stdev_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_stdev_dataframe", false]], "get_user_sum_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_sum_dataframe", false]], "get_variance_in_dd() (in module features.variance_in_dd)": [[26, "features.variance_in_DD.get_variance_in_DD", false]], "get_within_person_disc_range() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_within_person_disc_range", false]], "get_word_ttr() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_word_TTR", false]], "get_zscore_across_all_chats() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_chats", false]], "get_zscore_across_all_conversations() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_conversations", false]], "gini_coefficient() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.gini_coefficient", false]], "info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.info_diversity", false]], "info_exchange() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.info_exchange", false]], "is_hedged_sentence_1() (in module features.hedge)": [[10, "features.hedge.is_hedged_sentence_1", false]], "is_valid_term() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.is_valid_term", false]], "lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.lexical_features", false]], "liwc_features() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.liwc_features", false]], "load_custem_liwc_dict() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.load_custem_liwc_dict", false]], "load_liwc_dict() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.load_liwc_dict", false]], "load_saved_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_saved_data", false]], "load_to_dict() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_dict", false]], "load_to_lists() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_lists", false]], "merge_conv_data_with_original() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.merge_conv_data_with_original", false]], "mimic_words() (in module features.word_mimicry)": [[28, "features.word_mimicry.mimic_words", false]], "module": [[2, "module-feature_builder", false], [3, "module-features.basic_features", false], [4, "module-features.burstiness", false], [5, "module-features.certainty", false], [6, "module-features.discursive_diversity", false], [7, "module-features.fflow", false], [8, "module-features.get_all_DD_features", false], [9, "module-features.get_user_network", false], [10, "module-features.hedge", false], [12, "module-features.info_exchange_zscore", false], [13, "module-features.information_diversity", false], [14, "module-features.lexical_features_v2", false], [15, "module-features.named_entity_recognition_features", false], [16, "module-features.other_lexical_features", false], [17, "module-features.politeness_features", false], [18, "module-features.politeness_v2", false], [19, "module-features.politeness_v2_helper", false], [20, "module-features.question_num", false], [21, "module-features.readability", false], [22, "module-features.reddit_tags", false], [23, "module-features.temporal_features", false], [24, "module-features.textblob_sentiment_analysis", false], [25, "module-features.turn_taking_features", false], [26, "module-features.variance_in_DD", false], [27, "module-features.within_person_discursive_range", false], [28, "module-features.word_mimicry", false], [63, "module-utils.assign_chunk_nums", false], [64, "module-utils.calculate_chat_level_features", false], [65, "module-utils.calculate_conversation_level_features", false], [66, "module-utils.calculate_user_level_features", false], [67, "module-utils.check_embeddings", false], [68, "module-utils.gini_coefficient", false], [70, "module-utils.preload_word_lists", false], [71, "module-utils.preprocess", false], [72, "module-utils.summarize_features", false], [73, "module-utils.zscore_chats_and_conversation", false]], "named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.named_entities", false]], "num_named_entity() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.num_named_entity", false]], "other_lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.other_lexical_features", false]], "phrase_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.phrase_split", false]], "positivity_zscore() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.positivity_zscore", false]], "prep_simple() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_simple", false]], "prep_whole() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_whole", false]], "preprocess_chat_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.preprocess_chat_data", false]], "preprocess_conversation_columns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_conversation_columns", false]], "preprocess_naive_turns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_naive_turns", false]], "preprocess_text() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text", false]], "preprocess_text_lowercase_but_retain_punctuation() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text_lowercase_but_retain_punctuation", false]], "preprocessing() (in module features.information_diversity)": [[13, "features.information_diversity.preprocessing", false]], "punctuation_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.punctuation_seperator", false]], "question() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.Question", false]], "read_in_lexicons() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.read_in_lexicons", false]], "reduce_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.reduce_chunks", false]], "remove_active_user() (in module features.get_user_network)": [[9, "features.get_user_network.remove_active_user", false]], "remove_unhashable_cols() (in module utils.preprocess)": [[71, "utils.preprocess.remove_unhashable_cols", false]], "save_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.save_features", false]], "sentence_pad() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_pad", false]], "sentence_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_split", false]], "sentenciser() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentenciser", false]], "set_self_conv_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.set_self_conv_data", false]], "sort_words() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.sort_words", false]], "str_to_vec() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.str_to_vec", false]], "text_based_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.text_based_features", false]], "token_count() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.token_count", false]], "train_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.train_spacy_ner", false]], "user_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.user_level_features", false]], "userlevelfeaturescalculator (class in utils.calculate_user_level_features)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator", false]], "utils.assign_chunk_nums": [[63, "module-utils.assign_chunk_nums", false]], "utils.calculate_chat_level_features": [[64, "module-utils.calculate_chat_level_features", false]], "utils.calculate_conversation_level_features": [[65, "module-utils.calculate_conversation_level_features", false]], "utils.calculate_user_level_features": [[66, "module-utils.calculate_user_level_features", false]], "utils.check_embeddings": [[67, "module-utils.check_embeddings", false]], "utils.gini_coefficient": [[68, "module-utils.gini_coefficient", false]], "utils.preload_word_lists": [[70, "module-utils.preload_word_lists", false]], "utils.preprocess": [[71, "module-utils.preprocess", false]], "utils.summarize_features": [[72, "module-utils.summarize_features", false]], "utils.zscore_chats_and_conversation": [[73, "module-utils.zscore_chats_and_conversation", false]], "word_start() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.word_start", false]]}, "objects": {"": [[2, 0, 0, "-", "feature_builder"]], "feature_builder": [[2, 1, 1, "", "FeatureBuilder"]], "feature_builder.FeatureBuilder": [[2, 2, 1, "", "chat_level_features"], [2, 2, 1, "", "conv_level_features"], [2, 2, 1, "", "featurize"], [2, 2, 1, "", "get_first_pct_of_chat"], [2, 2, 1, "", "load_custem_liwc_dict"], [2, 2, 1, "", "merge_conv_data_with_original"], [2, 2, 1, "", "preprocess_chat_data"], [2, 2, 1, "", "save_features"], [2, 2, 1, "", "set_self_conv_data"], [2, 2, 1, "", "user_level_features"]], "features": [[3, 0, 0, "-", "basic_features"], [4, 0, 0, "-", "burstiness"], [5, 0, 0, "-", "certainty"], [6, 0, 0, "-", "discursive_diversity"], [7, 0, 0, "-", "fflow"], [8, 0, 0, "-", "get_all_DD_features"], [9, 0, 0, "-", "get_user_network"], [10, 0, 0, "-", "hedge"], [12, 0, 0, "-", "info_exchange_zscore"], [13, 0, 0, "-", "information_diversity"], [14, 0, 0, "-", "lexical_features_v2"], [15, 0, 0, "-", "named_entity_recognition_features"], [16, 0, 0, "-", "other_lexical_features"], [17, 0, 0, "-", "politeness_features"], [18, 0, 0, "-", "politeness_v2"], [19, 0, 0, "-", "politeness_v2_helper"], [20, 0, 0, "-", "question_num"], [21, 0, 0, "-", "readability"], [22, 0, 0, "-", "reddit_tags"], [23, 0, 0, "-", "temporal_features"], [24, 0, 0, "-", "textblob_sentiment_analysis"], [25, 0, 0, "-", "turn_taking_features"], [26, 0, 0, "-", "variance_in_DD"], [27, 0, 0, "-", "within_person_discursive_range"], [28, 0, 0, "-", "word_mimicry"]], "features.basic_features": [[3, 3, 1, "", "count_characters"], [3, 3, 1, "", "count_messages"], [3, 3, 1, "", "count_words"]], "features.burstiness": [[4, 3, 1, "", "burstiness"], [4, 3, 1, "", "get_team_burstiness"]], "features.certainty": [[5, 3, 1, "", "get_certainty"]], "features.discursive_diversity": [[6, 3, 1, "", "get_DD"], [6, 3, 1, "", "get_cosine_similarity"], [6, 3, 1, "", "get_unique_pairwise_combos"]], "features.fflow": [[7, 3, 1, "", "get_forward_flow"]], "features.get_all_DD_features": [[8, 3, 1, "", "conv_to_float_arr"], [8, 3, 1, "", "get_DD_features"]], "features.get_user_network": [[9, 3, 1, "", "get_user_network"], [9, 3, 1, "", "remove_active_user"]], "features.hedge": [[10, 3, 1, "", "is_hedged_sentence_1"]], "features.info_exchange_zscore": [[12, 3, 1, "", "get_info_exchange_wordcount"]], "features.information_diversity": [[13, 3, 1, "", "calculate_ID_score"], [13, 3, 1, "", "get_info_diversity"], [13, 3, 1, "", "info_diversity"], [13, 3, 1, "", "preprocessing"]], "features.lexical_features_v2": [[14, 3, 1, "", "get_liwc_count"], [14, 3, 1, "", "liwc_features"]], "features.named_entity_recognition_features": [[15, 3, 1, "", "built_spacy_ner"], [15, 3, 1, "", "calculate_named_entities"], [15, 3, 1, "", "named_entities"], [15, 3, 1, "", "num_named_entity"], [15, 3, 1, "", "train_spacy_ner"]], "features.other_lexical_features": [[16, 3, 1, "", "classify_NTRI"], [16, 3, 1, "", "get_proportion_first_pronouns"], [16, 3, 1, "", "get_word_TTR"]], "features.politeness_features": [[17, 3, 1, "", "get_politeness_strategies"]], "features.politeness_v2": [[18, 3, 1, "", "get_politeness_v2"]], "features.politeness_v2_helper": [[19, 3, 1, "", "Question"], [19, 3, 1, "", "adverb_limiter"], [19, 3, 1, "", "bare_command"], [19, 3, 1, "", "clean_text"], [19, 3, 1, "", "commit_data"], [19, 3, 1, "", "conjection_seperator"], [19, 3, 1, "", "count_matches"], [19, 3, 1, "", "count_spacy_matches"], [19, 3, 1, "", "feat_counts"], [19, 3, 1, "", "get_dep_pairs"], [19, 3, 1, "", "get_dep_pairs_noneg"], [19, 3, 1, "", "load_saved_data"], [19, 3, 1, "", "load_to_dict"], [19, 3, 1, "", "load_to_lists"], [19, 3, 1, "", "phrase_split"], [19, 3, 1, "", "prep_simple"], [19, 3, 1, "", "prep_whole"], [19, 3, 1, "", "punctuation_seperator"], [19, 3, 1, "", "sentence_pad"], [19, 3, 1, "", "sentence_split"], [19, 3, 1, "", "sentenciser"], [19, 3, 1, "", "token_count"], [19, 3, 1, "", "word_start"]], "features.question_num": [[20, 3, 1, "", "calculate_num_question_naive"]], "features.readability": [[21, 3, 1, "", "classify_text_dalechall"], [21, 3, 1, "", "count_difficult_words"], [21, 3, 1, "", "count_syllables"], [21, 3, 1, "", "dale_chall_helper"]], "features.reddit_tags": [[22, 3, 1, "", "count_all_caps"], [22, 3, 1, "", "count_bullet_points"], [22, 3, 1, "", "count_ellipses"], [22, 3, 1, "", "count_emojis"], [22, 3, 1, "", "count_emphasis"], [22, 3, 1, "", "count_line_breaks"], [22, 3, 1, "", "count_links"], [22, 3, 1, "", "count_numbering"], [22, 3, 1, "", "count_parentheses"], [22, 3, 1, "", "count_quotes"], [22, 3, 1, "", "count_responding_to_someone"], [22, 3, 1, "", "count_user_references"]], "features.temporal_features": [[23, 3, 1, "", "coerce_to_date_or_number"], [23, 3, 1, "", "get_time_diff"], [23, 3, 1, "", "get_time_diff_startend"]], "features.textblob_sentiment_analysis": [[24, 3, 1, "", "get_polarity_score"], [24, 3, 1, "", "get_subjectivity_score"]], "features.turn_taking_features": [[25, 3, 1, "", "count_turn_taking_index"], [25, 3, 1, "", "count_turns"], [25, 3, 1, "", "get_turn"]], "features.variance_in_DD": [[26, 3, 1, "", "get_variance_in_DD"]], "features.within_person_discursive_range": [[27, 3, 1, "", "get_nan_vector"], [27, 3, 1, "", "get_within_person_disc_range"]], "features.word_mimicry": [[28, 3, 1, "", "Content_mimicry_score"], [28, 3, 1, "", "Content_mimicry_score_per_conv"], [28, 3, 1, "", "computeTF"], [28, 3, 1, "", "compute_frequency"], [28, 3, 1, "", "compute_frequency_per_conv"], [28, 3, 1, "", "function_mimicry_score"], [28, 3, 1, "", "get_content_words_in_message"], [28, 3, 1, "", "get_function_words_in_message"], [28, 3, 1, "", "get_mimicry_bert"], [28, 3, 1, "", "get_moving_mimicry"], [28, 3, 1, "", "mimic_words"]], "utils": [[63, 0, 0, "-", "assign_chunk_nums"], [64, 0, 0, "-", "calculate_chat_level_features"], [65, 0, 0, "-", "calculate_conversation_level_features"], [66, 0, 0, "-", "calculate_user_level_features"], [67, 0, 0, "-", "check_embeddings"], [68, 0, 0, "-", "gini_coefficient"], [70, 0, 0, "-", "preload_word_lists"], [71, 0, 0, "-", "preprocess"], [72, 0, 0, "-", "summarize_features"], [73, 0, 0, "-", "zscore_chats_and_conversation"]], "utils.assign_chunk_nums": [[63, 3, 1, "", "assign_chunk_nums"], [63, 3, 1, "", "create_chunks"], [63, 3, 1, "", "create_chunks_messages"], [63, 3, 1, "", "reduce_chunks"]], "utils.calculate_chat_level_features": [[64, 1, 1, "", "ChatLevelFeaturesCalculator"]], "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator": [[64, 2, 1, "", "calculate_chat_level_features"], [64, 2, 1, "", "calculate_hedge_features"], [64, 2, 1, "", "calculate_politeness_sentiment"], [64, 2, 1, "", "calculate_politeness_v2"], [64, 2, 1, "", "calculate_textblob_sentiment"], [64, 2, 1, "", "calculate_vector_word_mimicry"], [64, 2, 1, "", "calculate_word_mimicry"], [64, 2, 1, "", "concat_bert_features"], [64, 2, 1, "", "get_certainty_score"], [64, 2, 1, "", "get_dale_chall_score_and_classfication"], [64, 2, 1, "", "get_forward_flow"], [64, 2, 1, "", "get_named_entity"], [64, 2, 1, "", "get_reddit_features"], [64, 2, 1, "", "get_temporal_features"], [64, 2, 1, "", "info_exchange"], [64, 2, 1, "", "lexical_features"], [64, 2, 1, "", "other_lexical_features"], [64, 2, 1, "", "positivity_zscore"], [64, 2, 1, "", "text_based_features"]], "utils.calculate_conversation_level_features": [[65, 1, 1, "", "ConversationLevelFeaturesCalculator"]], "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator": [[65, 2, 1, "", "calculate_conversation_level_features"], [65, 2, 1, "", "calculate_info_diversity"], [65, 2, 1, "", "calculate_team_burstiness"], [65, 2, 1, "", "get_conversation_level_aggregates"], [65, 2, 1, "", "get_discursive_diversity_features"], [65, 2, 1, "", "get_gini_features"], [65, 2, 1, "", "get_turn_taking_features"], [65, 2, 1, "", "get_user_level_aggregates"]], "utils.calculate_user_level_features": [[66, 1, 1, "", "UserLevelFeaturesCalculator"]], "utils.calculate_user_level_features.UserLevelFeaturesCalculator": [[66, 2, 1, "", "calculate_user_level_features"], [66, 2, 1, "", "get_centroids"], [66, 2, 1, "", "get_user_level_summary_statistics_features"], [66, 2, 1, "", "get_user_level_summed_features"], [66, 2, 1, "", "get_user_network"]], "utils.check_embeddings": [[67, 3, 1, "", "check_embeddings"], [67, 3, 1, "", "fix_abbreviations"], [67, 3, 1, "", "generate_bert"], [67, 3, 1, "", "generate_certainty_pkl"], [67, 3, 1, "", "generate_lexicon_pkl"], [67, 3, 1, "", "generate_vect"], [67, 3, 1, "", "get_nan_vector"], [67, 3, 1, "", "get_sentiment"], [67, 3, 1, "", "is_valid_term"], [67, 3, 1, "", "load_liwc_dict"], [67, 3, 1, "", "read_in_lexicons"], [67, 3, 1, "", "sort_words"], [67, 3, 1, "", "str_to_vec"]], "utils.gini_coefficient": [[68, 3, 1, "", "get_gini"], [68, 3, 1, "", "gini_coefficient"]], "utils.preload_word_lists": [[70, 3, 1, "", "get_dale_chall_easy_words"], [70, 3, 1, "", "get_first_person_words"], [70, 3, 1, "", "get_function_words"], [70, 3, 1, "", "get_question_words"]], "utils.preprocess": [[71, 3, 1, "", "assert_key_columns_present"], [71, 3, 1, "", "compress"], [71, 3, 1, "", "create_cumulative_rows"], [71, 3, 1, "", "get_turn_id"], [71, 3, 1, "", "preprocess_conversation_columns"], [71, 3, 1, "", "preprocess_naive_turns"], [71, 3, 1, "", "preprocess_text"], [71, 3, 1, "", "preprocess_text_lowercase_but_retain_punctuation"], [71, 3, 1, "", "remove_unhashable_cols"]], "utils.summarize_features": [[72, 3, 1, "", "get_max"], [72, 3, 1, "", "get_mean"], [72, 3, 1, "", "get_median"], [72, 3, 1, "", "get_min"], [72, 3, 1, "", "get_stdev"], [72, 3, 1, "", "get_sum"], [72, 3, 1, "", "get_user_max_dataframe"], [72, 3, 1, "", "get_user_mean_dataframe"], [72, 3, 1, "", "get_user_median_dataframe"], [72, 3, 1, "", "get_user_min_dataframe"], [72, 3, 1, "", "get_user_stdev_dataframe"], [72, 3, 1, "", "get_user_sum_dataframe"]], "utils.zscore_chats_and_conversation": [[73, 3, 1, "", "get_zscore_across_all_chats"], [73, 3, 1, "", "get_zscore_across_all_conversations"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "terms": {"": [0, 1, 2, 4, 5, 9, 11, 13, 14, 25, 28, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 64, 65, 66], "0": [0, 1, 2, 5, 10, 13, 16, 21, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 45, 46, 47, 50, 51, 53, 55, 59, 61], "00222437221134802": [5, 64], "01": 51, "02": 51, "04": 40, "0496": [21, 33], "05": [13, 40, 50, 51], "06": 51, "08": [42, 50], "09": [45, 46, 50], "1": [0, 1, 2, 3, 10, 13, 22, 24, 32, 34, 35, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 51, 53, 55, 56, 57, 59, 61, 62, 67], "10": [1, 5, 6, 21, 24, 33, 59, 61, 64], "100": [1, 21, 33, 37, 42, 47, 62], "10th": 33, "1145": [21, 24], "1177": [5, 64], "11th": 33, "12": [35, 45, 46, 50], "1287": 6, "12th": 33, "13": 50, "14": 50, "15": [1, 37, 50], "1579": [21, 33], "17": 50, "1948": 33, "195": 36, "1977": 62, "1d": 67, "1lpngokujsx": 5, "1st": 50, "1st_person": 50, "1st_person_pl": 50, "1st_person_start": 50, "2": [0, 1, 2, 34, 35, 41, 47, 59, 61, 62, 67], "20": [37, 59], "2004": 42, "2007": [0, 5, 42, 67], "2009": 60, "2012": 55, "2013": [12, 16, 31, 32, 36, 37, 38, 41, 43, 49, 50, 52, 54, 70], "2015": [42, 53, 58, 60, 67], "2016": 4, "2017": 13, "2018": [40, 44, 55], "2019": [35, 52], "2020": [18, 21, 24, 33, 49, 50, 56, 57], "2021": [1, 6, 43, 44], "2022": [13, 34], "2023": [1, 5, 30, 59, 61, 64], "2024": [40, 42], "21": 59, "22": [41, 50], "2384068": 4, "24": [1, 61], "25": 47, "27": [42, 50], "28": 50, "29": 50, "2nd": 50, "2nd_person": 50, "2nd_person_start": 50, "3": [0, 1, 2, 21, 34, 41, 42, 51, 59, 61, 67, 71], "30": 50, "3000": 33, "32": [34, 50], "3432929": [21, 24], "35": 51, "36": 50, "38": 50, "39": 49, "39512260": 68, "3n": 59, "4": [0, 1, 5, 13, 21, 30, 33, 41, 42, 56, 61, 62, 71], "4274": 6, "43": 50, "45": 50, "47": 50, "49": 50, "4pit4bqz6": 5, "4th": [21, 33], "5": [1, 5, 21, 30, 33, 37, 41, 59], "50": [1, 47], "52": 50, "53": 50, "57": 50, "58": 50, "5th": 33, "6": [1, 33, 43], "60": 51, "63": 50, "6365": 21, "64": 67, "68": 47, "6th": 33, "7": [30, 33, 48], "70": 50, "78": [35, 50], "7th": 33, "8": [1, 30, 33, 42, 67], "80": [21, 70], "82": 41, "85": 34, "86": 35, "87": 50, "89": [45, 46], "8th": 33, "9": [2, 5, 21, 30, 33, 40, 47, 50, 67], "9123": 47, "92": 51, "93chall_readability_formula": [21, 70], "94": 15, "95": 47, "95450": 42, "97": 51, "9855072464": 47, "9992": 47, "99954": 47, "9th": 33, "A": [1, 2, 4, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 28, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 47, 49, 50, 51, 52, 57, 59, 60, 61, 62, 64, 66, 67, 68, 70, 71, 72, 73], "And": [1, 62], "As": [1, 31, 35, 36, 40, 42, 45, 61], "Be": 1, "But": [1, 50, 62], "By": [0, 1, 11, 42, 50], "For": [0, 1, 31, 34, 37, 41, 42, 43, 47, 49, 54, 56, 59, 62, 65], "If": [0, 1, 2, 5, 21, 29, 30, 35, 42, 45, 47, 50, 55, 61, 62, 63, 64, 65, 66, 67, 71], "In": [1, 21, 30, 31, 34, 35, 36, 37, 39, 41, 42, 45, 46, 47, 50, 55, 59, 61, 62], "It": [1, 2, 31, 32, 33, 36, 37, 41, 44, 45, 46, 50, 64, 65, 66, 67, 71], "NO": 37, "NOT": [1, 61], "No": [19, 50, 53], "Not": 41, "One": [1, 37, 61], "That": [29, 55], "The": [1, 2, 3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "Then": [1, 55, 61], "There": [1, 11, 32, 61, 66], "These": [1, 11, 17, 32, 34, 42, 48, 52, 61, 62, 69], "To": [0, 1, 29, 31, 34, 37, 40, 42, 55, 56, 57, 61, 62], "WITH": 21, "Will": 50, "_deviat": 55, "_lexical_wordcount_custom": 42, "_preprocessed_": 0, "abbrevi": 67, "abil": [13, 29], "abl": [31, 36, 61], "abort": 1, "about": [1, 12, 29, 31, 36, 41, 47, 61, 62], "abov": [1, 21, 34, 61], "abstract_id": 4, "academ": 42, "accept": [0, 1, 58, 61], "access": [0, 1, 11, 15, 61], "accommod": [28, 32, 39, 45, 46, 64], "accord": [21, 37, 59, 64, 70], "accordingli": 63, "account": [1, 29, 32, 42], "accus": 50, "achiev": [50, 62], "acknowledg": 49, "acm": [21, 24], "acommod": 36, "across": [1, 13, 28, 31, 34, 40, 41, 50, 62, 64, 73], "action": 59, "activ": [1, 9, 44, 55, 71], "actual": [41, 56], "ad": [61, 62, 71], "adapt": 59, "add": [0, 1, 2, 21, 51, 61], "addit": [2, 32, 34, 42, 63, 69], "addition": [0, 30, 31, 32, 54], "address": 1, "adjac": 71, "adjust": [0, 21, 37, 63], "advanc": [31, 36], "advantag": 4, "adverb": [19, 31, 36], "adverb_limit": [19, 49], "affect": [0, 1, 29, 35, 44], "affirm": 49, "after": [0, 1, 31, 34, 36, 42, 43, 61, 62, 64, 67], "again": [32, 34, 67], "against": [28, 31, 36, 52, 67], "agarw": 62, "aggreg": [0, 2, 3, 37, 44, 61, 62, 65, 66, 72], "agre": 47, "agreement": 49, "ah": [31, 36], "ai": 62, "aim": [39, 62], "airtim": [37, 62], "al": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "algorithm": [56, 57], "align": [35, 51], "all": [0, 1, 2, 6, 11, 12, 13, 15, 19, 22, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 46, 48, 49, 51, 52, 55, 58, 61, 62, 64, 65, 66, 71, 73], "allow": [0, 1, 67], "almaatouq": 59, "alon": 67, "along": 1, "alongsid": 1, "alphabet": 49, "alphanumer": [42, 67, 71], "alreadi": [0, 1, 2, 4, 10, 12, 16, 67], "also": [0, 1, 2, 28, 30, 31, 32, 34, 36, 37, 38, 42, 47, 51, 54, 60, 61, 62, 64, 65, 67, 69], "alsobai": 59, "altern": 59, "although": [1, 23, 31, 36], "alwai": [1, 55], "am": [31, 36, 42, 54, 62], "amaz": [48, 56], "ambient": 32, "american": 33, "ami": [47, 59, 62], "amic": 62, "among": [36, 37, 52, 55, 62], "amongst": [6, 35, 48], "an": [0, 1, 2, 5, 8, 11, 12, 13, 21, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 45, 47, 48, 49, 50, 51, 52, 54, 59, 60, 61, 62, 63, 65, 66, 67, 68, 71], "analys": [1, 62], "analysi": [0, 1, 11, 52, 62, 67, 71], "analyt": 62, "analyz": [0, 2, 13, 14, 16, 17, 19, 20, 21, 22, 24, 28, 43, 52, 62, 67, 71], "analyze_first_pct": [0, 1, 2], "angri": 47, "ani": [0, 1, 29, 31, 33, 38, 54, 62, 71], "annot": [17, 50], "anoth": [30, 34, 36, 48], "answer": 29, "anybodi": [31, 36], "anyth": [1, 23, 31, 36, 56], "anywher": [31, 36], "apartment": 42, "api": [2, 47], "api_refer": 24, "apolog": [17, 50], "apologi": 49, "appear": [0, 15, 28, 31, 37, 38, 42, 64, 67], "append": [1, 17, 42, 64, 65, 66, 67], "appli": [4, 13, 18, 62, 64, 69], "applic": [29, 71], "appreci": 50, "approach": [32, 38, 42, 45, 46, 49, 53, 64], "appropri": [1, 31, 69], "ar": [0, 1, 2, 3, 5, 9, 10, 11, 15, 17, 19, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 71], "arcross": 34, "area": 62, "aren": [31, 36], "around": 2, "arous": 48, "arrai": [6, 8, 67, 68], "articl": [37, 50], "ask": [20, 47, 54], "ask_ag": 49, "aspect": [50, 62], "assert_key_columns_pres": 71, "assign": [1, 31, 36, 38, 45, 46, 52, 59, 61, 63, 71], "assign_chunk_num": 69, "associ": [1, 4, 15, 21, 29, 30, 31, 32, 36, 40, 45, 46, 47, 48, 61], "assum": [0, 1, 2, 10, 12, 16, 23, 31, 41, 60, 61, 67, 71], "assumpt": [1, 41, 61], "asterisk": 22, "attribut": [0, 1, 11, 34, 51, 52, 56, 62], "author": [5, 31, 36, 59], "auto": 2, "automat": [0, 1, 61, 69], "auxiliari": [31, 36], "avail": [0, 1, 61, 62, 63, 64, 67], "averag": [1, 11, 13, 28, 30, 33, 34, 35, 40, 41, 46, 52, 64, 65, 72], "avil": 62, "avoid": 30, "awar": 29, "awesom": 62, "b": [4, 34, 35, 42, 45, 46, 55, 62], "bachelor": 42, "back": 62, "bag": [32, 38, 42, 45, 46, 49, 53, 56, 57], "bare_command": [19, 49], "base": [0, 1, 2, 15, 18, 19, 31, 32, 34, 35, 36, 37, 40, 42, 51, 52, 53, 54, 55, 56, 57, 61, 62, 63, 64, 65, 66, 71], "basic": [10, 11, 12, 16, 61, 62], "basic_featur": 11, "batch": 67, "batch_num": 1, "batch_siz": 67, "bay": [56, 57], "bbevi": 18, "becaus": [1, 2, 12, 21, 31, 36, 40, 42, 56, 61], "becom": [44, 61, 62], "been": [1, 12, 16, 31, 36, 61], "befor": [0, 1, 2, 17, 31, 36, 45, 48], "beforehand": 64, "begin": [34, 42, 54, 58, 61, 62, 63], "behavior": [0, 1, 11, 62, 63], "being": [4, 13, 14, 16, 17, 20, 21, 24, 31, 32, 36, 43, 47, 51, 55, 56, 60], "belong": 1, "below": [1, 11, 21, 33, 36, 45, 48, 51, 61, 62, 67, 69], "ber": 54, "bert": [0, 1, 31, 35, 36, 39, 46, 61, 64, 67], "bert_path": 67, "bert_sentiment_data": [1, 61, 64], "best": 29, "better": [31, 61], "between": [4, 6, 13, 21, 23, 24, 28, 30, 31, 34, 35, 36, 37, 40, 45, 46, 55, 58, 59, 62, 64, 65, 67], "betwen": 34, "beyond": 2, "big": 59, "binari": [10, 32, 38], "blame": 47, "blob": [1, 24, 61, 67], "block": [22, 32, 48, 59], "blog": 15, "bodi": 67, "bold": [22, 64], "bool": [2, 63, 65, 66, 67, 71], "boolean": 1, "bootstrap": 62, "both": [0, 1, 2, 42, 52, 54, 55, 59, 62], "bother": 50, "bottom": 59, "bought": 41, "bound": [29, 35, 36, 37, 52, 55], "boundari": [34, 35, 42], "boyd": [0, 42], "break": [22, 48, 64], "brief": 44, "bring": 0, "broader": 52, "broken": 59, "btw": 50, "bug": [1, 61], "build": [1, 7, 34, 45, 46, 62], "built": [1, 11, 42, 67], "built_spacy_n": 15, "bullet": [22, 48, 64], "bunch": 59, "burst": 58, "bursti": [1, 11, 39, 58, 61, 65], "by_the_wai": 49, "c": [12, 34, 35, 45, 46, 62], "cach": [0, 2, 51, 61], "calcul": [1, 2, 5, 11, 12, 16, 18, 21, 28, 33, 41, 48, 49, 50, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68, 72, 73], "calculate_chat_level_featur": [1, 61, 69], "calculate_conversation_level_featur": 69, "calculate_hedge_featur": 64, "calculate_id_scor": 13, "calculate_info_divers": 65, "calculate_named_ent": 15, "calculate_num_question_na": 20, "calculate_politeness_senti": 64, "calculate_politeness_v2": 64, "calculate_team_bursti": 65, "calculate_textblob_senti": 64, "calculate_user_level_featur": 69, "calculate_vector_word_mimicri": 64, "calculate_word_mimicri": 64, "call": [1, 2, 8, 11, 13, 61, 62, 64, 69], "can": [0, 1, 2, 11, 23, 31, 32, 33, 34, 36, 37, 42, 43, 44, 47, 48, 49, 50, 52, 54, 60, 61, 62, 67, 69], "can_you": 49, "cannot": [1, 31, 36, 45, 46, 49, 62], "cao": [21, 24, 33, 43, 44, 56, 57, 62], "cap": [22, 48, 64], "capit": [0, 2, 48], "captur": [29, 30, 32, 34, 35, 38, 41, 42, 55], "caract": 40, "cardiffnlp": [1, 61], "care": 1, "carefulli": 60, "carri": 31, "casa_token": 5, "case": [1, 13, 16, 28, 29, 30, 31, 36, 37, 41, 45, 46, 51, 55, 56, 59, 61], "casual": 43, "categori": [21, 32, 42, 45, 46, 49, 52, 67], "caus": [31, 32, 36, 59], "caveat": 42, "center": 62, "central": 34, "centroid": [34, 66], "certain": [5, 19, 30, 42, 45, 46, 49, 71], "certainli": 42, "certainti": [11, 38, 39, 42, 64, 67], "cfm": 4, "chall": [1, 21, 39, 64, 70], "chang": [0, 1, 34, 50, 61, 71], "charact": [1, 2, 3, 15, 19, 37, 42, 49, 62, 64, 65, 66, 67, 71], "characterist": [1, 62], "chat": [0, 1, 2, 4, 5, 6, 7, 8, 12, 13, 14, 16, 23, 25, 28, 29, 32, 35, 36, 41, 44, 45, 46, 49, 59, 61, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "chat_data": [2, 6, 7, 8, 26, 27, 28, 63, 64, 65, 66, 67, 71], "chat_df": 14, "chat_featur": [1, 42, 61, 65, 66], "chat_level_data": 72, "chat_level_featur": 2, "chatlevelfeaturescalcul": [1, 2, 17, 21, 61, 64, 69], "chats_data": 73, "check": [19, 23, 44, 64, 67, 71], "check_embed": [1, 61, 69], "chen": 62, "choic": 1, "choos": [1, 60], "chose": 1, "chronolog": 1, "chunk": [34, 59, 63], "chunk_num": 63, "circlelyt": 13, "citat": [21, 24], "cite": 50, "clarif": [16, 32, 64], "class": [1, 2, 31, 61, 62, 64, 65, 66], "classif": [21, 64], "classifi": [16, 21, 50, 56, 57], "classify_ntri": 16, "classify_text_dalechal": 21, "clean": [2, 17, 19, 67, 71], "clean_text": 19, "clear": 1, "close": [31, 42, 48, 62], "closer": [45, 46, 59], "clue": 62, "cmu": 12, "code": [6, 18, 29, 32, 51, 55, 61, 62, 68], "coeffici": [1, 4, 39, 62, 65, 68], "coerce_to_date_or_numb": 23, "cognit": 62, "col": 2, "colab": [0, 1], "collabor": [59, 62], "collaps": 2, "collect": [1, 2, 34, 49, 50, 52, 61, 62], "colleg": 33, "column": [0, 2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 18, 23, 25, 28, 42, 51, 56, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "column_count_frequ": 28, "column_count_mim": 28, "column_mimc": 28, "column_nam": 71, "column_to_summar": 72, "com": [1, 2, 4, 5, 13, 15, 18, 64, 67, 68, 71], "comb": 62, "combin": [0, 1, 6, 28, 64, 71], "come": [1, 12, 13, 21, 32, 33, 42, 58, 61], "comm": [1, 61], "command": [1, 61], "comment": 48, "commit": 23, "commit_data": 19, "common": [0, 32, 62, 64], "commonli": 37, "commun": [0, 1, 11, 42, 44, 48, 55, 60, 62, 64], "companion": 1, "compar": [31, 35, 44, 45, 52, 64, 71, 73], "compat": [0, 1, 61], "complement": [31, 36], "complet": [1, 2, 31, 55], "complex": [0, 35, 43, 50, 62], "compon": 50, "comprehens": [33, 48], "compress": 71, "comput": [0, 2, 4, 5, 6, 10, 11, 12, 13, 14, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 45, 46, 49, 50, 52, 55, 62, 64, 65, 66, 69, 73], "compute_frequ": 28, "compute_frequency_per_conv": 28, "compute_vectors_from_preprocess": [0, 2], "computetf": 28, "conain": 61, "concat_bert_featur": [1, 61, 64], "concaten": [19, 49, 64, 71], "concentr": 55, "concept": [29, 39, 42, 62], "conceptu": [61, 62], "concis": 43, "concret": 29, "conduct": 1, "confid": [2, 5, 15, 30, 47, 64], "conflict": 62, "confound": 44, "congruent": 34, "conjection_seper": 19, "conjunct": [19, 31, 36, 49], "conjunction_start": 49, "connect": 39, "conscious": 35, "consecut": 22, "consequ": [0, 1], "consid": [1, 2, 33, 37], "consider": [61, 62], "consist": [31, 36, 40, 41], "constitut": 41, "constrain": [34, 35], "construct": [1, 11, 55, 62], "constructor": 47, "consult": 5, "contact": 0, "contain": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 29, 30, 35, 38, 42, 47, 49, 50, 55, 61, 62, 63, 64, 67, 71, 72, 73], "content": [0, 1, 12, 13, 28, 34, 36, 39, 41, 42, 45, 46, 62, 64, 67], "content_mimicry_scor": 28, "content_mimicry_score_per_conv": 28, "content_word_accommod": 31, "content_word_accommodation_per_conv": 31, "content_word_mimicri": 28, "contentcod": 67, "contentcodingdictionari": 67, "context": [2, 32, 42, 48, 62, 71], "continu": [56, 57], "contract": 49, "contrast": 39, "contribut": [13, 34, 37, 62], "control": 1, "conv": [1, 61], "conv_data": [2, 65], "conv_features_al": [1, 61], "conv_features_bas": [1, 11, 61], "conv_level_featur": 2, "conv_to_float_arr": 8, "convei": [6, 34, 52], "conveni": [1, 61], "convers": [0, 2, 3, 4, 6, 7, 8, 9, 12, 13, 23, 25, 28, 29, 31, 34, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 58, 59, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_id": [2, 28, 61, 71], "conversation_id_col": [0, 1, 2, 4, 6, 7, 8, 9, 13, 23, 25, 26, 27, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_num": [0, 1, 2, 6, 7, 64, 66, 73], "conversationlevelfeaturescalcul": [2, 65, 69], "convert": [8, 41, 49, 67, 71], "convict": 5, "convo_aggreg": [0, 1, 2, 65], "convo_column": [0, 1, 2, 65], "convo_method": [0, 1, 2, 65], "convokit": [17, 50, 62, 64], "coordin": 55, "copi": [0, 1, 42], "copular": [31, 36], "core": [34, 69], "cornel": 17, "corpu": [0, 1, 50], "corrado": 37, "correl": [41, 55], "correspond": [30, 34, 35, 40, 49, 55, 66], "cosin": [6, 7, 13, 28, 31, 34, 35, 36, 40, 45, 46, 65], "could": [1, 31, 33, 36, 50, 54], "could_you": 49, "couldn": [31, 36], "count": [0, 1, 3, 12, 14, 15, 16, 19, 21, 25, 28, 30, 31, 32, 36, 39, 41, 43, 44, 49, 52, 53, 54, 56, 58, 64, 65, 66], "count_all_cap": 22, "count_bullet_point": 22, "count_charact": 3, "count_difficult_word": 21, "count_ellips": 22, "count_emoji": 22, "count_emphasi": 22, "count_line_break": 22, "count_link": 22, "count_match": [19, 49], "count_messag": 3, "count_numb": 22, "count_parenthes": 22, "count_quot": 22, "count_responding_to_someon": 22, "count_spacy_match": 19, "count_syl": 21, "count_turn": 25, "count_turn_taking_index": 25, "count_user_refer": 22, "count_word": 3, "countabl": [1, 65], "countd": 36, "counterfactu": 50, "cours": [16, 31, 34, 36, 63], "cover": 28, "creat": [0, 1, 2, 13, 19, 31, 40, 42, 61, 62, 64, 65, 66, 71], "create_chunk": 63, "create_chunks_messag": 63, "create_cumulative_row": 71, "credit": 33, "criteria": 67, "crowd": 13, "csv": [1, 2, 61, 62, 67], "cumul": [2, 71], "cumulative_group": [0, 1, 2, 71], "current": [1, 11, 23, 31, 34, 35, 36, 40, 45, 46, 58, 61, 64, 71], "curt": 43, "custom": [0, 2, 11, 14, 62], "custom_featur": [0, 1, 2, 61], "custom_liwc_dictionari": [14, 64], "custom_liwc_dictionary_path": [0, 2, 42], "customiz": 62, "cut": 1, "cutoff": [2, 15, 47, 64], "d": [0, 1, 2, 31, 34, 36, 61], "dale": [1, 21, 39, 64, 70], "dale_chall_help": 21, "danescu": [49, 50], "dash": 22, "data": [0, 2, 6, 7, 8, 9, 13, 19, 20, 32, 37, 40, 41, 47, 51, 55, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "datafram": [0, 1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 37, 42, 47, 49, 59, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "dataknowsal": 15, "dataset": [1, 2, 9, 12, 13, 28, 31, 41, 47, 52, 61, 64, 65, 66, 73], "date": [1, 42, 61], "datetim": [23, 58], "dcosta": 62, "deal": [50, 59], "death": 1, "debat": 59, "decid": 62, "decis": [1, 13, 62], "declar": [1, 62, 69], "deepli": 62, "default": [0, 1, 2, 5, 11, 13, 16, 23, 30, 34, 35, 42, 47, 62, 63, 64, 66, 67, 71, 73], "defer": [17, 50], "defin": [0, 11, 21, 31, 34, 36, 40, 59, 62, 64, 65, 66, 70], "definit": [1, 3, 44], "degre": [6, 30, 36, 45, 46, 55], "delet": 29, "deliber": 1, "demo": 61, "democrat": 1, "demystifi": 62, "denomin": 59, "denot": 42, "densiti": 60, "dep_": 49, "dep_pair": 19, "depend": [0, 1, 10, 19, 32, 49, 52, 61, 63], "deriv": [2, 11, 65, 66], "descend": 67, "describ": [1, 11, 62], "descript": [1, 61], "design": [0, 1, 2, 13, 34, 62], "desir": [2, 63, 72], "detail": [0, 11, 33, 41, 43, 61, 62], "detect": [1, 32, 37, 38, 47, 48, 49, 54], "determin": [13, 18, 31, 35, 36, 40, 45, 46, 71], "dev": 24, "develop": [5, 37, 40, 62], "deviat": [4, 5, 29, 40, 41, 55, 58, 65, 72, 73], "df": [4, 8, 9, 12, 13, 16, 18, 23, 28, 63, 71], "dic": [2, 14, 42, 67], "diccategori": 67, "dict": [2, 14, 17, 19, 28, 64, 67, 71], "dicterm": 67, "dictext": 67, "dictionari": [0, 1, 2, 14, 15, 17, 19, 28, 30, 42, 49, 61, 64, 67, 71], "did": [1, 31, 36, 37, 47, 50, 54, 62], "didn": [31, 36], "differ": [0, 1, 2, 4, 11, 12, 23, 28, 29, 31, 34, 36, 37, 39, 40, 44, 45, 46, 47, 49, 55, 62, 63, 64, 65, 66, 67, 71], "differenti": [49, 59], "difficult": [21, 33], "difficult_word": 21, "difficulti": 33, "dimens": [40, 62], "dimension": [34, 35], "dinner": 41, "direct": [34, 43, 45, 47, 50, 69], "direct_quest": [32, 50, 54], "direct_start": 50, "directli": [1, 62, 69], "directori": [0, 2, 19, 61, 65, 67], "disabl": 1, "disagr": 49, "disagre": 51, "discours": [31, 36], "discret": [31, 36, 45, 46], "discurs": [0, 1, 6, 8, 39, 40, 61, 65, 66], "discursive_divers": 11, "discus": 8, "discuss": [0, 1, 31, 34, 39, 40, 42, 43, 61, 62, 71], "dispers": 68, "displai": [1, 34, 42, 46, 61], "dispos": 1, "distanc": [34, 35, 40], "distinct": [31, 36, 59], "distinguish": 59, "distribut": 31, "div": 16, "diverg": [6, 34, 35], "divers": [0, 1, 6, 8, 13, 39, 61, 65], "divid": [16, 34, 59, 63], "dl": [21, 24], "do": [0, 1, 29, 31, 34, 36, 37, 43, 49, 50, 54, 62, 69], "doc": [2, 19], "doc_top": 13, "document": [1, 17, 61, 69], "doe": [1, 2, 29, 40, 42, 43, 45, 47, 54, 61, 71], "doesn": [0, 1, 29, 31, 36, 42, 45, 61, 67], "doi": [5, 6, 21, 24, 64], "domain": [31, 50], "don": [31, 36, 49, 54, 62, 67], "done": [2, 50], "dot": 22, "doubl": 30, "down": [31, 36], "download": [1, 61], "download_resourc": [1, 61], "downstream": [17, 62], "dozen": 62, "drive": [62, 69], "driver": [2, 61, 64, 65, 66], "drop": [0, 2, 64], "due": [34, 59], "duncan": 62, "duplic": [1, 2, 71], "durat": [58, 63], "dure": [2, 55, 59, 62], "dynam": [59, 61], "e": [0, 1, 2, 4, 15, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 47, 48, 49, 52, 54, 56, 59, 61, 63, 65, 66, 67, 71], "e2": [21, 70], "each": [0, 1, 2, 3, 4, 7, 8, 9, 11, 12, 15, 17, 19, 23, 25, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "earlier": [0, 1, 2, 42], "easi": [1, 21, 62, 70], "easier": 21, "easili": 33, "easy_word": 21, "eat": 34, "echo": 31, "econom": 37, "edg": [29, 59], "edu": [1, 12, 16, 17, 70], "effect": [1, 41], "effici": 1, "effort": 55, "either": [20, 23, 52, 55], "elaps": [23, 58], "element": [1, 6, 67], "ellips": [22, 48, 64], "els": [1, 22, 47, 64], "embed": [8, 31, 34, 35, 36, 45, 46, 65, 66, 67, 69], "emili": [30, 35, 45, 46, 47, 59, 62], "emoji": [22, 42, 48, 64, 67, 71], "emot": [1, 61], "emoticon": 48, "emphas": [22, 48, 64], "emphasi": 48, "empirica": [1, 2, 71], "emploi": 45, "empti": [0, 2, 13, 64, 67], "en": [1, 21, 24, 61, 70], "en_core_web_sm": [1, 61], "enabl": 71, "enclos": 22, "encod": [1, 8], "encompass": 62, "encount": [1, 34, 35, 61], "encourag": 64, "end": [0, 1, 15, 20, 23, 34, 42, 54, 62, 63, 67], "engag": 43, "engin": 2, "english": [34, 42], "enjoi": 62, "ensur": [0, 1, 40, 42, 49, 61, 63, 67, 71], "entir": [0, 1, 12, 28, 31, 36, 40, 41, 52, 59, 62, 73], "entiti": [0, 2, 15, 39, 64], "entityrecogn": 47, "entri": [1, 28, 61], "ep8dauru1ogvjurwdbof5h6ayfbslvughjyiv31d_as6ppbt": 5, "equal": [1, 21, 23, 34, 37, 40, 55, 59, 61, 62, 63], "equival": [0, 1, 41, 55, 61], "eric": 62, "error": [1, 16, 61, 71], "escap": 42, "especi": [41, 62], "essenti": [51, 71], "establish": 31, "estim": 31, "et": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "etc": [10, 15, 16, 17, 42], "evalu": [5, 47, 50], "evan": 62, "even": [0, 1, 2, 34, 37, 42, 62, 63, 67], "evenli": [34, 55], "event": [1, 34, 55, 61], "ever": 62, "everi": [1, 4, 13, 31, 34, 35, 36, 61, 62], "everybodi": [31, 36], "everyon": [31, 36, 47, 62], "everyth": [31, 36, 56], "everywher": [31, 36], "evolut": 35, "evolv": [35, 71], "exactli": [1, 2, 71], "examin": [40, 62, 63], "exampl": [0, 10, 11, 15, 21, 24, 29, 31, 32, 34, 37, 42, 43, 48, 50, 51, 54, 56, 59, 60, 61, 62, 67], "example_data": 1, "exce": 15, "except": [42, 67, 71], "exchang": [12, 35, 39, 40, 45, 55, 64], "exclud": [0, 41, 42], "exclus": [41, 42], "excus": 32, "exhibit": 35, "exist": [0, 1, 2, 55, 61, 62, 63, 64, 67], "expand": 49, "expect": [1, 37, 42, 47], "expected_valu": 47, "explain": [0, 29], "explan": [29, 43], "explor": [61, 62], "express": [5, 14, 30, 31, 32, 36, 38, 42, 64, 67], "extend": 1, "extens": [43, 44], "extent": [1, 4, 7, 12, 31, 34, 35, 37, 51, 55, 59, 61], "extern": 48, "extra": 51, "extract": [1, 17, 19, 28, 40, 50, 64], "extrem": [55, 56, 57], "face": [1, 51, 61], "facilit": [62, 71], "fact": [4, 35, 50, 54, 59], "factual": [17, 24, 50], "fail": [1, 61], "fals": [0, 1, 2, 31, 54, 61, 67, 71], "famili": 42, "far": [34, 35, 46, 50, 62], "faster": 14, "feat_count": 19, "featur": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67], "feature_build": [0, 1, 42, 61], "feature_dict": [1, 61], "feature_method": [64, 65], "feature_nam": [1, 61], "featurebuild": [0, 2, 11, 42, 47, 69], "features_conceptu": [1, 61], "feauturebuild": 1, "few": [48, 62], "fewer": [12, 60], "fflow": 11, "field": [13, 17], "file": [0, 2, 12, 14, 19, 42, 61, 65, 67], "filenam": [1, 2, 19], "filenotfounderror": 67, "filler": [37, 60], "filler_paus": 49, "filter": [19, 62], "final": [1, 2, 34, 42, 62], "find": [1, 19, 28, 50], "fingertip": 62, "finit": 55, "first": [0, 2, 11, 12, 16, 19, 31, 34, 35, 36, 39, 40, 41, 42, 45, 46, 49, 52, 54, 59, 61, 62, 64, 67, 70, 71], "first_person": 12, "first_person_plur": 49, "first_person_raw": [12, 16], "first_person_singl": 49, "five": 37, "fix": [52, 67], "fix_abbrevi": 67, "flag": 71, "float": [0, 2, 4, 5, 6, 8, 10, 13, 14, 16, 21, 24, 25, 28, 68], "floor": 59, "flow": [0, 1, 7, 31, 36, 39, 41, 45, 46, 61, 64], "focal": [31, 36], "focu": 41, "folder": [0, 1, 19], "follow": [0, 1, 2, 11, 16, 17, 29, 31, 32, 33, 41, 42, 47, 49, 50, 53, 55, 59, 60, 61, 64, 65, 67], "for_m": 49, "for_you": 49, "forc": [0, 1, 61], "form": 1, "formal": [1, 61], "formal_titl": 49, "format": [0, 1, 8, 17, 22, 42, 47, 48, 61, 62, 64, 67], "former": [45, 46], "formula": [33, 42, 59, 64, 70], "fornt": 1, "forward": [0, 1, 7, 39, 41, 61, 64], "forward_flow": 35, "found": [1, 2, 5, 28, 30, 33, 61, 69], "four": [1, 8], "fourth": 33, "frac": 55, "fraction": 59, "framework": [49, 50, 62], "frequenc": [28, 31, 44, 64], "frequency_dict": 28, "fridai": 34, "from": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 19, 21, 28, 29, 31, 32, 33, 34, 35, 36, 39, 41, 42, 49, 50, 51, 53, 55, 56, 57, 58, 61, 62, 64, 65, 66, 67, 71], "full": [1, 28, 37], "full_empirical_dataset": 1, "fulli": [32, 48], "functinon": 12, "function": [1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 21, 23, 28, 31, 39, 44, 45, 46, 50, 56, 57, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73], "function_mimic_word": 28, "function_mimicry_scor": 28, "function_word_mimicri": 28, "function_word_refer": 28, "fund": 62, "further": [1, 61, 71], "furthermor": 42, "futur": [23, 66], "g": [0, 1, 2, 4, 15, 20, 29, 31, 32, 36, 37, 38, 41, 42, 47, 48, 52, 54, 59, 61, 63, 65, 66, 67, 71], "game": [1, 2, 59, 71], "gaug": [33, 52], "gener": [0, 2, 9, 11, 12, 16, 21, 31, 34, 35, 36, 40, 42, 45, 46, 49, 51, 59, 65, 66, 67, 69, 71, 72], "generaliz": 23, "generate_bert": 67, "generate_certainty_pkl": 67, "generate_lexicon_pkl": 67, "generate_vect": 67, "gensim": 40, "get": [16, 20, 21, 28, 30, 31, 36, 49, 66, 67], "get_all_dd_featur": 11, "get_centroid": 66, "get_certainti": 5, "get_certainty_scor": 64, "get_content_words_in_messag": 28, "get_conversation_level_aggreg": 65, "get_cosine_similar": 6, "get_dale_chall_easy_word": [21, 70], "get_dale_chall_score_and_classf": 64, "get_dd": 6, "get_dd_featur": 8, "get_dep_pair": [19, 49], "get_dep_pairs_noneg": [19, 49], "get_discursive_diversity_featur": 65, "get_first_pct_of_chat": 2, "get_first_person_word": [12, 70], "get_forward_flow": [7, 64], "get_function_word": 70, "get_function_words_in_messag": 28, "get_gini": 68, "get_gini_featur": 65, "get_info_divers": 13, "get_info_exchange_wordcount": 12, "get_liwc_count": 14, "get_max": 72, "get_mean": 72, "get_median": 72, "get_mimicry_bert": 28, "get_min": 72, "get_moving_mimicri": 28, "get_named_ent": 64, "get_nan_vector": [27, 67], "get_polarity_scor": 24, "get_politeness_strategi": 17, "get_politeness_v2": 18, "get_proportion_first_pronoun": 16, "get_question_word": 70, "get_reddit_featur": 64, "get_senti": 67, "get_stdev": 72, "get_subjectivity_scor": 24, "get_sum": 72, "get_team_bursti": 4, "get_temporal_featur": [4, 64], "get_time_diff": 23, "get_time_diff_startend": 23, "get_turn": 25, "get_turn_id": 71, "get_turn_taking_featur": 65, "get_unique_pairwise_combo": 6, "get_user_level_aggreg": 65, "get_user_level_summary_statistics_featur": 66, "get_user_level_summed_featur": 66, "get_user_max_datafram": 72, "get_user_mean_datafram": 72, "get_user_median_datafram": 72, "get_user_min_datafram": 72, "get_user_network": [11, 66], "get_user_stdev_datafram": 72, "get_user_sum_datafram": 72, "get_variance_in_dd": 26, "get_within_person_disc_rang": 27, "get_word_ttr": 16, "get_zscore_across_all_chat": 73, "get_zscore_across_all_convers": 73, "gina": 62, "gini": [1, 39, 62, 65, 68], "gini_coeffici": [11, 69], "github": [0, 1, 2, 18, 67, 71], "give": [0, 1, 29, 37], "give_ag": 49, "given": [0, 1, 5, 6, 13, 14, 28, 30, 31, 33, 34, 35, 36, 40, 41, 55, 59, 66, 67, 71], "go": [1, 34, 35, 45, 46, 50, 62], "goal": 62, "goe": 67, "good": [50, 56, 62], "goodby": 49, "googl": [0, 1], "got": [31, 36], "gotta": [31, 36], "grade": 33, "grader": 21, "grai": 35, "grammat": 36, "granularli": 35, "grate": [42, 62], "gratitud": [17, 49, 50], "great": [47, 50, 51, 56, 59, 60, 62], "greater": 55, "greet": 50, "groceri": 41, "group": [0, 2, 4, 13, 29, 33, 34, 41, 52, 59, 62, 68, 71, 72], "grouping_kei": [0, 1, 2, 71], "gt": 22, "guess": 10, "gun": 1, "gy": 15, "gym": 34, "ha": [0, 1, 32, 34, 35, 37, 42, 43, 46, 52, 54, 55, 56, 59, 61, 62, 63, 67, 71], "had": [1, 31, 36, 54, 61], "hadn": [31, 36], "handl": [19, 29, 71], "happen": [1, 2, 55, 62, 63], "happi": 42, "hardcod": 67, "harder": 21, "hashedg": [17, 50], "hasn": [31, 36], "hasneg": 50, "hasposit": 50, "hate": 31, "have": [0, 1, 10, 12, 16, 31, 34, 36, 37, 40, 41, 42, 45, 46, 50, 54, 59, 60, 61, 62, 71], "haven": [31, 36], "he": [1, 31, 36], "header": [18, 67], "hear": 32, "heart": [61, 62], "heat": 1, "heavi": 62, "hedg": [11, 30, 39, 49, 50, 64], "hei": [1, 35, 45, 46, 50], "helena": [47, 62], "hello": [0, 43, 49], "help": [0, 31, 34, 36, 43, 45, 46, 52, 58, 69], "helper": [23, 67], "her": [30, 31, 36], "here": [1, 29, 31, 34, 41, 42, 47, 61, 62, 66], "herself": [31, 36], "hesit": [60, 64], "hi": [31, 35, 36, 43, 45, 46], "hierach": 71, "hierarch": 71, "high": [0, 1, 2, 61, 62, 71], "higher": [0, 1, 21, 31, 34, 36, 40, 41, 42, 44, 45, 46, 55, 60], "highest": 71, "highlight": 1, "him": [31, 36], "himself": [31, 36], "hmm": [31, 36], "hoc": 62, "hold": 31, "hole": 62, "home": 42, "homework": 34, "homonym": 31, "hood": 1, "hope": 35, "host": [45, 46], "hour": 48, "how": [1, 5, 28, 29, 30, 31, 34, 35, 36, 39, 43, 45, 51, 52, 54, 56, 62], "howev": [0, 1, 3, 11, 35, 40, 42, 44, 54, 56, 61, 62], "howitwork": 1, "html": [1, 2, 15, 17, 24, 61], "http": [1, 2, 4, 5, 6, 12, 13, 15, 16, 17, 18, 21, 24, 41, 45, 46, 47, 61, 64, 67, 68, 70, 71], "hu": [1, 42, 62], "hug": [1, 51, 61], "huggingfac": 1, "huh": [31, 32, 36], "human": [37, 50, 62], "hyperlink": 48, "hyphen": [1, 42, 61, 67], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73], "iby1": 5, "id": [2, 4, 7, 23, 28, 62, 64, 66, 68, 71, 72, 73], "idea": [12, 35, 40, 47, 51], "ident": [34, 35], "identif": 1, "identifi": [0, 1, 2, 4, 8, 9, 15, 23, 25, 30, 31, 41, 47, 50, 52, 61, 63, 64, 71, 72], "identiif": [13, 71], "ignor": [1, 32], "illustr": [1, 41, 48, 62], "imagin": 1, "immedi": [31, 35, 64], "impact": [1, 60], "impersonal_pronoun": 49, "implement": 64, "impli": 37, "import": [31, 32, 36, 44, 45, 62, 69], "incent": 13, "includ": [0, 1, 2, 10, 17, 22, 31, 32, 35, 36, 42, 45, 46, 51, 52, 56, 61, 62, 66, 71], "inclus": [13, 71], "incongru": [8, 34], "incorpor": [1, 42, 45, 46], "increas": [1, 62], "incredibli": 42, "increment": 71, "independ": 1, "index": [1, 2, 4, 13, 25, 37, 39, 55, 61, 65], "indic": [1, 2, 16, 21, 22, 30, 32, 34, 35, 36, 40, 41, 43, 44, 48, 49, 50, 52, 55, 60, 63, 71], "indirect": 50, "indirect_btw": 50, "indirect_greet": 50, "indirectli": 69, "individu": [0, 1, 5, 11, 31, 34, 37, 45, 50, 59, 60, 62, 72], "inequ": 37, "infer": [1, 51, 67], "influenc": 1, "info": [13, 18, 64], "info_divers": 13, "info_exchang": 64, "info_exchange_wordcount": [41, 64], "info_exchange_zscor": 11, "inform": [0, 6, 11, 12, 13, 24, 32, 34, 39, 48, 62, 64, 65], "informal_titl": 49, "information_divers": 11, "initi": [2, 62, 63, 64, 65, 66], "input": [0, 2, 4, 6, 12, 13, 14, 15, 16, 19, 20, 22, 28, 42, 50, 55, 60, 62, 63, 64, 65, 66, 67, 71, 72], "input_data": [25, 68, 72], "input_df": [1, 2, 42, 61, 71], "inquiri": [0, 30, 39, 52], "insid": 1, "insight": 1, "inspir": 15, "instal": [1, 61, 62], "instanc": [1, 22, 50, 59, 66], "instanti": 2, "insteac": 1, "instead": [1, 62], "instruct": [1, 61], "int": [2, 3, 10, 13, 15, 16, 19, 20, 22, 28, 63, 64, 67], "intact": 71, "integ": [0, 13, 40, 47], "intend": 59, "interact": [1, 11, 43, 44, 62, 69], "interconnect": 62, "interest": [1, 61, 62], "interfac": 62, "intermedi": [59, 64], "intern": 29, "interpret": [0, 1, 23], "interrupt": 59, "interv": [58, 65], "introduc": [42, 62], "introduct": [11, 61], "invalid": 67, "invers": 64, "involv": [41, 62, 65], "io": [1, 24, 47, 61], "ipynb": [0, 1], "is_hedged_sentence_1": 10, "is_valid_term": 67, "isn": [1, 31, 36], "issu": [1, 31, 36, 37, 42, 61], "ital": 64, "italic": 22, "item": [0, 71], "its": [0, 15, 31, 35, 36, 40, 41, 47, 54, 55, 64, 69], "itself": [23, 31, 36, 44], "jami": [0, 42], "john": 1, "jonson": 62, "journal": [5, 64], "json": [1, 61], "jurafski": 70, "juri": 1, "juries_df": 1, "jury_conversations_with_outcome_var": 1, "jury_feature_build": 1, "jury_output": 1, "jury_output_chat_level": [1, 61], "jury_output_turn_level": 1, "just": [1, 2, 31, 36, 46, 50, 59, 61, 62], "katharina": 34, "keep": [1, 71], "kei": [1, 2, 4, 19, 28, 30, 54, 61, 71], "keyerror": 71, "keyword": [19, 49], "kind": [10, 62], "kitchen": 42, "knob": 0, "know": [1, 30], "knowledg": 29, "known": [1, 32, 61], "kumar": 62, "kw": 19, "l714": 67, "l81": 67, "lab": [1, 2, 62, 71], "label": [1, 15, 21, 51], "lack": [31, 38, 45, 46], "languag": [15, 31, 34, 42, 50, 62], "larg": [1, 31, 69], "larger": [0, 31, 61], "last": [1, 31], "late": 32, "later": [0, 1, 2, 42, 61], "latest": [1, 61], "latter": [31, 36], "lda": [13, 40], "learn": [1, 61, 62], "least": [10, 32, 42, 63, 67], "led": 62, "legal": 49, "lemmat": [13, 40], "len": 28, "length": [35, 39, 41, 42, 44, 67], "less": [1, 13, 32, 50, 52, 55, 62, 63], "let": [41, 49, 53], "let_me_know": 49, "letter": [49, 71], "level": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 14, 16, 23, 61, 64, 65, 66, 71, 72], "lexic": [1, 10, 12, 14, 16, 31, 32, 36, 42, 60, 62, 64], "lexical_featur": [14, 64], "lexical_features_v2": [10, 11], "lexicon": [0, 5, 10, 14, 30, 39, 50, 52, 67, 69], "lexicons_dict": 67, "librari": [34, 51, 56, 57], "lift": 62, "light": 61, "like": [1, 22, 31, 34, 36, 41, 50, 61, 62], "limiat": 32, "limit": [11, 32, 37, 42, 54], "line": [0, 1, 19, 22, 48, 61, 62, 64, 67], "linear": 64, "linguist": [0, 18, 19, 30, 39, 50, 52], "link": [22, 29, 48, 50, 64], "list": [1, 2, 6, 7, 10, 11, 12, 13, 15, 19, 20, 21, 22, 28, 31, 33, 36, 37, 42, 48, 49, 50, 53, 54, 61, 64, 65, 66, 67, 68, 70, 71], "liter": 42, "literatur": 62, "littl": 38, "littlehors": 1, "liu": [42, 52], "live": [1, 54], "liwc": [0, 2, 14, 30, 39, 51, 52, 56, 62, 64, 67], "liwc2015": 42, "liwc_featur": [10, 14], "liwc_test_output": 42, "lix": 34, "ll": [1, 31, 36, 61], "load": [2, 19, 67, 69], "load_custem_liwc_dict": 2, "load_liwc_dict": 67, "load_saved_data": 19, "load_to_dict": 19, "load_to_list": 19, "loc": 15, "local": [1, 42, 51, 61], "locat": [1, 62], "long": 4, "longer": [30, 41, 43, 48, 61, 62], "look": [2, 34, 61, 65, 66], "loos": 36, "lot": [31, 36], "loud": 60, "love": [31, 56], "low": [1, 2, 29, 55, 60, 71], "lower": [0, 1, 21, 31, 33, 36, 41, 44, 55, 60], "lowercas": [2, 13, 40, 48, 49, 71], "lowest": 71, "lpearl": 16, "lst": 6, "m": [0, 2, 23, 30, 31, 36], "made": [1, 23, 35, 59, 61, 62], "magnitud": 55, "mai": [1, 2, 11, 28, 31, 32, 35, 36, 37, 41, 42, 43, 44, 54, 61, 62, 71], "main": [1, 2, 5, 62, 64, 65, 66, 67], "make": [1, 5, 31, 34, 55, 56, 62, 66, 69, 71], "man": 62, "mani": [1, 4, 11, 32, 37, 41, 60, 62, 66], "manner": [55, 62], "manual": [1, 61], "map": [13, 34, 67], "mark": [19, 20, 22, 43, 54, 64, 71], "marker": [18, 32, 39, 42, 50, 51, 52, 54, 56], "marlow": 44, "matarazzo": 62, "match": [1, 5, 16, 19, 30, 67], "math": 34, "matter": [28, 47], "max": [0, 1, 2, 11, 66, 72], "max_num_chunk": 63, "max_user_mean_num_word": 1, "maxim": [34, 35, 37, 72], "maximum": [1, 63, 65, 72], "mayb": [38, 47], "mcfarland": 70, "me": [31, 32, 36, 41, 50, 53], "mean": [0, 1, 2, 4, 6, 11, 13, 21, 29, 31, 34, 36, 40, 41, 42, 47, 55, 56, 58, 61, 62, 65, 66, 72, 73], "mean_num_word": 1, "meaning": [31, 41, 55], "meaningless": 41, "meant": 39, "measur": [0, 1, 7, 12, 13, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 64, 68], "mechan": 32, "median": [0, 1, 72], "medium": 21, "meet": 48, "member": [13, 34, 37, 55], "merg": [2, 8, 65, 66], "merge_conv_data_with_origin": 2, "messag": [0, 1, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 39, 41, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 71, 73], "messaga": 61, "message_col": [0, 1, 2, 12, 13, 14, 61, 64, 65, 67, 71], "message_col_origin": 14, "message_embed": [6, 7, 8], "message_lower_with_punc": 71, "metadata": [0, 1], "method": [1, 5, 31, 41, 50, 62, 65], "metric": [0, 1, 8, 30, 34, 35, 46, 47, 48, 55, 66], "michael": 1, "mid": [1, 2, 71], "middl": [21, 34, 63], "might": [0, 1, 29, 43, 48, 53], "mikeyeoman": [18, 64], "mileston": 34, "millisecond": [0, 2], "mimic": [28, 31, 36, 45], "mimic_word": 28, "mimick": [28, 31, 64], "mimicri": [0, 1, 28, 31, 35, 36, 39, 61, 64], "mimicry_bert": [45, 46], "min": [1, 2, 11, 72], "mind": [1, 35, 50], "mine": [31, 36, 53, 59], "minim": [0, 41, 60], "minimum": [65, 72], "minmiz": 72, "minu": [12, 41, 64], "minut": [55, 58], "mirror": 1, "miss": [1, 32, 61, 71], "mitig": [31, 36], "mizil": [49, 50], "mm": [31, 36], "mnsc": 6, "modal": 50, "mode": 60, "model": [1, 13, 15, 31, 34, 35, 36, 40, 45, 46, 47, 51, 62, 67], "modif": 35, "modifi": [1, 9, 19, 32, 64], "modul": [0, 1, 11, 34, 49, 50, 61, 69], "monologu": 59, "more": [0, 1, 2, 11, 12, 22, 23, 24, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 45, 46, 50, 52, 55, 59, 61, 62, 71], "morn": 1, "most": [1, 24, 31, 55, 62, 69], "motiv": 61, "move": [0, 1, 28, 31, 36, 39, 45, 59, 61], "movi": 31, "much": [1, 28, 31, 34, 35, 36, 45, 62], "multi": [1, 2, 71], "multidimension": [45, 46], "multipl": [0, 1, 2, 19, 62, 71], "must": [1, 6, 62, 71], "my": [30, 31, 35, 36, 45, 46, 50, 53], "my_chat_featur": 1, "my_feature_build": 61, "my_fil": 1, "my_output": 61, "my_output_chat_level": 61, "my_output_conv_level": 61, "my_output_user_level": 61, "my_pandas_datafram": 61, "myself": [31, 36, 53], "n": [0, 2, 35, 45, 46, 47, 57, 59, 60], "n_chat": 59, "na": [5, 33, 43, 44, 48, 49, 50, 53, 58], "naiv": [2, 20, 32, 34, 38, 39, 53, 56, 57, 64], "name": [0, 2, 4, 7, 8, 9, 12, 13, 14, 15, 17, 19, 23, 25, 28, 30, 32, 35, 39, 42, 45, 46, 50, 51, 56, 63, 64, 66, 67, 68, 71, 72, 73], "name_to_train": 47, "named_ent": [15, 47], "named_entity_recognition_featur": 11, "nan": [0, 34, 67], "nate": [35, 45, 46], "nathaniel": [35, 45, 46], "nativ": 50, "natur": [43, 55], "ndarrai": 68, "nearest": [13, 40], "nearli": 62, "necessari": [63, 67], "need": [0, 1, 2, 21, 62, 66, 67], "need_sent": 67, "need_senti": 67, "neg": [1, 24, 29, 31, 34, 35, 36, 42, 50, 51, 52, 54, 56, 61, 62, 67], "negat": [19, 49], "negative_bert": [0, 1, 51, 61], "negative_emot": [49, 51, 52, 56], "negoti": 62, "neighborhood": 54, "neither": 30, "ner": 15, "ner_cutoff": [0, 1, 2, 47, 64], "ner_train": 64, "ner_training_df": [0, 1, 2, 47], "nest": [0, 1, 2, 22, 71], "net": [45, 46], "network": 11, "neutral": [1, 5, 24, 30, 51, 55, 61, 67], "neutral_bert": [1, 51, 61], "never": 1, "new": [1, 4, 13, 34, 61, 64, 65, 66, 72], "new_column_nam": 72, "next": [1, 32, 47, 58], "nice": [1, 50, 54, 61], "nicknam": 1, "niculescu": [49, 50], "night": 31, "nikhil": [59, 62], "nltk": [1, 42, 61], "nobodi": [31, 36], "nois": 32, "non": [1, 2, 28, 31, 37, 42, 48, 61, 62, 67, 71], "none": [1, 2, 19, 23, 37, 55, 61, 64, 65, 66, 67, 71], "nor": 30, "normal": [19, 28, 31], "notabl": 62, "note": [0, 2, 12, 16, 20, 42, 61, 67, 71], "notebook": [0, 1], "noth": [31, 36, 56], "noun": 1, "novel": [45, 46], "now": [0, 1], "nowher": [31, 36], "np": [67, 68], "ntri": 32, "null": 34, "num": 48, "num_char": 65, "num_chunk": [27, 63], "num_hedge_word": 10, "num_messag": 65, "num_named_ent": [15, 47], "num_row": 63, "num_top": 13, "num_word": [12, 16, 65], "number": [0, 1, 3, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 25, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 54, 56, 58, 59, 60, 62, 63, 64, 66, 67, 69, 71, 72], "numer": [0, 1, 2, 11, 13, 33, 68, 72, 73], "numpi": [1, 61, 68], "o": 35, "object": [1, 2, 11, 19, 44, 50, 57, 58, 61, 62, 64, 65, 66], "obtain": [0, 1, 13, 17, 23, 24, 34, 42, 61], "occur": [0, 4, 31, 42, 71], "occurr": 19, "off": [0, 1, 31, 36], "offer": 0, "offici": [61, 67], "often": [28, 36, 47, 48, 62], "oh": [31, 36, 48], "okai": [31, 36], "older": [1, 49, 61], "on_column": [18, 23, 28, 68, 72, 73], "onc": [1, 2, 11, 58, 61, 62, 67], "one": [0, 1, 2, 4, 10, 12, 19, 23, 25, 28, 29, 31, 32, 36, 37, 47, 51, 56, 59, 61, 62, 67, 68, 71, 73], "ones": [31, 36], "onli": [0, 1, 2, 5, 11, 23, 29, 31, 32, 34, 36, 37, 45, 53, 58, 59, 61, 62, 67, 71], "onlin": [1, 32, 39, 64], "onward": 0, "open": [0, 62, 66], "operation": [39, 50, 59], "opinion": [24, 31], "oppos": [2, 31, 34, 35, 55], "opposit": 34, "option": [1, 2, 37, 62, 63, 67, 71], "order": [0, 1, 35, 37, 42, 67, 71], "org": [2, 6, 15, 21, 24, 41, 70], "organ": 1, "origin": [1, 2, 5, 12, 21, 31, 32, 35, 36, 37, 45, 46, 49, 50, 59], "orthogon": 34, "other": [0, 1, 2, 9, 11, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 45, 46, 48, 51, 52, 54, 56, 58, 59, 61, 62, 64, 66, 71], "other_lexical_featur": [11, 64], "otherwis": [2, 10, 21, 23, 32, 38, 63, 67, 71], "our": [0, 1, 2, 11, 13, 29, 31, 32, 36, 37, 39, 53, 59, 61, 71], "ourselv": 53, "out": [1, 16, 19, 31, 36, 42, 55, 60, 62], "outcom": [1, 44, 62], "output": [0, 2, 10, 17, 19, 40, 42, 61, 62, 64, 67], "output_file_bas": [0, 1, 2, 42, 61], "output_file_path_chat_level": [1, 2], "output_file_path_conv_level": [1, 2], "output_file_path_user_level": [1, 2], "output_path": 67, "outsid": [1, 2, 12], "over": [1, 16, 29, 31, 34, 35, 36, 37, 53, 55, 60, 62, 67, 71], "overal": [30, 31, 34, 36, 45, 46], "overrid": [0, 1, 2], "overview": [0, 61, 62], "overwhelmingli": 1, "overwritten": 1, "own": [0, 1, 2, 9, 35, 62, 64], "p": 55, "pacakg": 24, "pace": [43, 62], "packag": [17, 18, 40, 62], "pad": 19, "page": [1, 11, 29, 39, 61, 62, 69], "pair": [6, 19, 34, 49, 71], "pairwis": [6, 34], "panda": [0, 1, 2, 12, 14, 16, 23, 47, 64, 65, 66, 71, 72, 73], "paper": [4, 5, 12, 18, 29, 40, 49, 50, 64], "paragraph": 22, "paramet": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 47, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "paramt": 1, "pardon": 32, "parenthes": [22, 42, 48, 64], "parenthet": [22, 48], "pars": [16, 50, 60], "parser": 67, "part": [1, 10, 13, 29, 36, 42, 52, 67, 71], "particip": [1, 9, 37, 62], "particl": [31, 36], "particular": [1, 11, 31, 32, 34, 41, 45, 47, 51, 59, 62], "particularli": 42, "partner": 32, "pass": [1, 13, 21, 47, 71], "path": [1, 2, 19, 61, 67], "path_in": 19, "pattern": [4, 11, 19, 42, 55, 62, 67], "paus": 4, "pd": [1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 19, 23, 25, 63, 64, 65, 66, 67, 68, 71], "pdf": [5, 12, 13, 16, 18, 21, 24, 64, 70], "penalti": 1, "pennebak": [0, 12, 37, 41, 42, 52], "pennyslvania": 62, "peopl": [1, 32, 59, 62], "per": [1, 6, 9, 19, 42, 63, 66, 72], "percentag": [2, 21], "perfect": [37, 59], "perform": [0, 1, 16, 50], "perhap": 1, "period": [4, 34, 55], "person": [1, 8, 12, 15, 16, 32, 34, 39, 41, 42, 50, 59, 62, 64, 70], "perspect": 1, "petrocelli": 5, "phrase": [19, 30, 38, 54], "phrase_split": 19, "pickl": [19, 67], "piec": [36, 42, 59, 63], "pl": 50, "place": [55, 61, 62], "plan": [34, 35, 45, 46], "player": 59, "pleas": [0, 1, 38, 49, 50, 61, 62], "please_start": 50, "point": [22, 24, 34, 35, 42, 45, 46, 48, 52, 64, 66], "poisson": 55, "polar": [24, 39, 51, 52, 64], "polit": [1, 17, 18, 30, 32, 38, 39, 42, 51, 52, 54, 56, 64], "politeness_featur": 11, "politeness_v2": 11, "politeness_v2_help": 11, "politenessstrategi": [17, 50], "portion": 0, "posit": [0, 1, 11, 15, 24, 29, 31, 39, 42, 50, 51, 54, 56, 61, 62, 64, 67], "positive_affect_lexical_per_100": [51, 52, 56], "positive_bert": [0, 1, 51, 61], "positive_emot": [49, 51, 52, 56], "positivity_bert": [1, 61], "positivity_zscor": 64, "positivity_zscore_chat": 52, "positivity_zscore_convers": 52, "possess": 31, "possibl": [1, 34, 62, 66], "possibli": [38, 62], "ppron": 67, "practic": [34, 35], "pre": [1, 4, 21, 37, 49, 64], "preced": [31, 35, 71], "precend": 35, "precis": 47, "precomput": 51, "predefin": 19, "predetermin": [31, 36], "predict": [2, 47, 51, 64], "prefer": [0, 1], "preload_word_list": 69, "prep_simpl": 19, "prep_whol": 19, "preposit": [31, 36], "preproces": 48, "preprocess": [0, 1, 2, 13, 19, 40, 43, 49, 51, 61, 69], "preprocess_chat_data": 2, "preprocess_conversation_column": 71, "preprocess_naive_turn": 71, "preprocess_text": 71, "preprocess_text_lowercase_but_retain_punctu": 71, "presenc": [2, 32, 67], "present": [1, 2, 14, 30, 31, 38, 42, 55, 62, 71], "preserv": 42, "prespecifi": 19, "prevent": 51, "previou": [1, 7, 28, 31, 36, 45, 46, 58, 64, 71], "primari": 34, "print": [2, 71], "prior": [2, 64, 71], "priya": [47, 62], "probabl": [15, 47], "problem": 62, "procedur": 62, "proceed": 46, "process": [0, 1, 2, 4, 10, 21, 37, 42, 55, 62, 64, 65, 67, 69, 71], "prodi": 15, "produc": [1, 2, 34], "product": 15, "professor": 62, "progress": [1, 2], "project": [54, 62], "pronoun": [12, 16, 31, 36, 39, 41, 42, 64, 67, 70], "proper": 1, "properli": 42, "properti": [1, 11, 61], "proport": [16, 39, 42, 64], "propos": 37, "provid": [0, 1, 2, 15, 29, 30, 33, 36, 39, 44, 47, 54, 62], "proxi": 42, "pseudonym": 1, "psycholog": 42, "pub": 70, "publish": [5, 30, 64], "pubsonlin": 6, "punctuat": [0, 2, 16, 19, 20, 21, 28, 43, 54, 60, 67, 71], "punctuation_seper": 19, "puncut": 48, "pure": [24, 36], "purpos": 1, "put": [34, 42, 50, 62, 66], "py": [0, 1, 14, 49, 61, 67], "pydata": 2, "pypi": [1, 61], "python": [1, 32, 41, 56, 57, 61, 62, 68], "qtd": 62, "qualiti": 41, "quantifi": [31, 36, 62], "quantiti": [37, 39, 41, 47], "quartil": 50, "question": [16, 19, 20, 29, 32, 39, 49, 50, 64, 66, 68, 70], "question_num": 11, "question_word": 20, "quick": [1, 43], "quickli": 0, "quit": 40, "quot": [22, 48, 64], "quotat": [22, 48], "rabbit": 62, "rain": 41, "rais": [67, 71], "random": 55, "rang": [5, 8, 24, 30, 33, 34, 35, 40, 51, 53, 55, 56, 57], "ranganath": [16, 31, 32, 36, 38, 43, 54, 70], "ranganath2013": 70, "ranganathetal2013_detectingflirt": 16, "rapid": [1, 4], "rare": [34, 35], "rate": [42, 51], "rather": [1, 31, 34, 35, 36, 37, 45, 46, 63], "ratio": [16, 39, 64], "raw": [0, 12, 16, 21, 31, 33, 42, 50, 64], "re": [1, 31, 36, 42, 50, 61], "reach": 42, "read": [0, 1, 2, 16, 21, 29, 33, 61, 62, 64, 65, 66, 67], "read_csv": 1, "read_in_lexicon": 67, "readabl": [11, 33, 64, 70], "reader": 33, "readi": 1, "readili": 62, "readthedoc": [1, 24, 61], "real": [1, 55], "realit": 13, "realli": [31, 36, 50], "reason": [31, 36, 45, 46, 49], "reassur": 49, "recal": 47, "recent": [0, 50], "recept": [18, 32, 39, 42, 50, 51, 52, 54, 56, 62, 64], "recogn": [1, 42, 43, 47], "recognit": [0, 2, 39, 64], "recommend": [0, 42, 62], "reddit": [48, 64], "reddit_tag": 11, "redditus": 48, "reduc": 63, "reduce_chunk": 63, "redund": [42, 62], "refer": [0, 1, 2, 11, 22, 24, 28, 31, 42, 48, 52, 61, 62, 64, 70], "reflect": [37, 43], "regardless": 1, "regener": [0, 2, 51, 67], "regenerate_vector": [0, 1, 2, 67], "regex": [14, 16, 42, 49, 67], "regist": 37, "regress": 1, "regular": [5, 14, 30, 32, 42, 55, 58, 67], "reichel": [53, 58, 60], "reidl": [4, 13], "reinvent": 62, "rel": [41, 51, 52, 55, 60, 64], "relat": [1, 61, 62, 64], "relationship": 36, "relev": [1, 29, 42, 44, 49, 51, 56, 61, 64, 65], "reli": [31, 34, 35, 36, 69], "reliabl": [33, 42], "remain": [1, 30, 71], "rememb": 1, "remov": [0, 2, 9, 13, 19, 28, 40, 43, 48, 49, 50, 71], "remove_active_us": 9, "remove_unhashable_col": 71, "renam": 1, "repair": [16, 39], "repeat": [60, 71], "repetit": 60, "replac": 19, "report": [1, 61], "repres": [2, 4, 6, 7, 11, 13, 23, 31, 34, 36, 42, 45, 46, 64, 66, 67, 68, 71, 72, 73], "represent": [34, 38, 67], "reproduc": [36, 62], "republican": 1, "request": [32, 50, 51], "requir": [0, 1, 20, 21, 31, 55, 61, 62, 64, 65, 66, 67, 71], "research": [1, 62], "reserv": 0, "resolv": 62, "resourc": [1, 39, 48, 61, 62], "respect": [1, 2, 12, 31, 36, 37, 69], "respons": [22, 48, 55, 58, 64], "restaur": [34, 56], "restor": 0, "restrict": 71, "result": [40, 55, 65, 72], "retain": [2, 16, 20, 21, 60, 71], "retriev": 50, "retunr": 3, "return": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 32, 43, 49, 50, 51, 55, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "reveal": 62, "revert": 50, "review": 62, "rewrit": 50, "rich": 62, "riedl": [13, 40, 55], "right": [31, 36, 61, 62], "roberta": [1, 39, 42, 52, 56, 61, 64, 67], "robust": 13, "rocklag": [5, 30, 64], "room": 59, "root": [13, 40], "rough": [12, 54], "roughli": 31, "round": [13, 40, 59, 71], "round_num": 1, "row": [0, 1, 2, 9, 13, 25, 37, 40, 59, 63, 68, 71, 72, 73], "rowbotham": 62, "rucker": 5, "rule": [1, 69], "run": [0, 10, 12, 16, 35, 46, 47, 48, 51, 61, 69], "runtim": [1, 35], "ryan": [0, 42], "ryanboyd": 67, "sagepub": [5, 64], "sai": [1, 32, 50, 59], "said": [1, 36, 62], "same": [0, 1, 2, 31, 34, 37, 45, 48, 52, 59, 60, 62, 71], "sampl": [61, 62], "sarcast": 48, "save": [0, 1, 2, 19, 64, 67], "save_featur": 2, "sbert": [1, 28, 31, 34, 35, 36, 45, 46, 64, 65, 67], "scale": [42, 51], "schema": 1, "scheme": 0, "school": [21, 62], "scienc": [29, 39, 62], "scientist": [61, 62], "score": [1, 4, 5, 11, 12, 13, 15, 21, 24, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 45, 46, 47, 50, 51, 53, 56, 57, 61, 64, 65, 67, 73], "script": [1, 61], "sea": 1, "seamless": 62, "search": [19, 61], "second": [0, 1, 4, 34, 42, 58, 59], "second_person": 49, "secr": [18, 49, 50, 64], "section": [1, 29, 61], "see": [0, 1, 2, 11, 30, 34, 38, 41, 45, 46, 47, 55, 62, 71], "seek": [5, 62], "seen": 67, "segment": [0, 19], "select": [2, 4, 23, 28, 36, 45, 64, 66, 67, 68, 71, 72, 73], "self": [1, 2, 61], "semant": [31, 34, 35, 41], "semantic_group": [1, 61], "send": [1, 37, 55], "sens": [1, 5, 31, 54, 66], "sensibl": 1, "sent": [1, 37, 64], "sentenc": [0, 1, 10, 15, 19, 20, 21, 33, 34, 35, 36, 42, 45, 46, 47, 48, 54, 56, 61, 67], "sentence_pad": 19, "sentence_split": 19, "sentence_to_train": 47, "sentencis": 19, "sentiment": [0, 1, 24, 31, 39, 42, 52, 56, 61, 62, 64, 67], "separ": [1, 2, 19, 34, 42, 51, 67], "sepcifi": 1, "septemb": 40, "sequenc": [1, 59], "sequenti": 1, "seri": [12, 16, 23, 28, 42, 71, 73], "serv": 12, "set": [0, 1, 2, 13, 23, 34, 48, 59], "set_self_conv_data": 2, "sever": [1, 30, 41, 42, 48, 51, 56, 61], "shall": 54, "share": [31, 36, 37], "she": [30, 31, 36], "shift": 34, "shop": 62, "short": [55, 58], "shorter": [13, 40, 41, 42, 43], "should": [0, 1, 2, 4, 14, 23, 28, 29, 31, 36, 47, 48, 54, 61, 62, 64, 66, 67, 68, 69, 71, 72, 73], "shouldn": [31, 36], "show": [1, 37, 61], "showeth": 62, "shruti": [35, 45, 46, 47, 62], "side": 31, "signal": [45, 55], "signifi": 42, "signific": [1, 61], "silent": 37, "similar": [1, 6, 7, 13, 28, 29, 31, 34, 35, 36, 40, 45, 46, 49, 50, 62, 65], "similarli": [1, 35], "simpl": [0, 1, 16, 19, 42, 61, 62], "simpli": [1, 5, 11, 28, 42, 56, 62], "simplifi": 1, "simplist": 41, "sinc": [1, 32, 41, 71], "singh": 62, "singl": [0, 1, 2, 11, 12, 19, 23, 31, 34, 35, 36, 37, 41, 45, 46, 59, 62, 71, 72], "singular": [12, 41, 64], "site": 16, "situat": 37, "size": [1, 13, 63, 67], "skip": 1, "slightli": [32, 62, 63], "slow": 1, "small": 40, "so": [1, 2, 10, 30, 31, 36, 37, 42, 50, 61, 62, 66, 67], "social": [29, 39, 61, 62], "socsci": 16, "softwar": 62, "sohi": 62, "sol3": 4, "solut": [1, 59], "solv": 62, "some": [0, 1, 11, 17, 29, 32, 34, 35, 37, 41, 61, 63], "somebodi": [31, 36], "someon": [22, 29, 31, 36, 47, 48, 61, 64], "someplac": [31, 36], "someth": 47, "sometim": 1, "somewhat": 35, "soon": 62, "sorri": [16, 32, 50], "sort": [10, 42, 67], "sort_word": 67, "sound": [47, 51], "sourc": [4, 5, 6, 12, 13, 16, 17, 21, 34, 35, 50, 64, 68], "space": [34, 40, 42, 67, 71], "spaci": [1, 19, 47, 49, 50, 61], "span": 63, "spars": 32, "speak": [1, 31, 36, 37, 59, 60, 62], "speaker": [0, 1, 2, 6, 8, 9, 25, 31, 34, 35, 37, 38, 42, 45, 46, 61, 66, 71, 72], "speaker_id": [2, 61, 72], "speaker_id_col": [0, 1, 2, 6, 8, 9, 25, 26, 27, 61, 65, 66, 71, 72], "speaker_nicknam": [0, 1, 2, 6, 9, 59, 66], "special": [0, 1, 2, 48, 71], "specif": [1, 2, 12, 32, 41, 48, 55, 61, 62, 69, 71], "specifi": [1, 2, 19, 47, 49, 65, 66, 67, 68, 71, 72, 73], "speciifc": 63, "spend": [51, 62], "spike": 55, "split": [19, 21, 43, 63], "spoke": 59, "spoken": [11, 37], "spread": 55, "squar": [13, 40], "src": 67, "ssrn": 4, "stabl": 40, "stack": 14, "stackoverflow": 68, "stage": [1, 2, 34, 71], "stamp": 55, "standard": [1, 4, 37, 40, 41, 42, 49, 55, 58, 60, 65, 72, 73], "stanford": 70, "start": [15, 19, 20, 22, 23, 50], "statement": [1, 38, 42, 47, 48, 61, 62, 64], "statist": [1, 65, 66, 68], "statologi": 41, "stdev": [1, 2, 11, 65, 66], "stem": 42, "step": [1, 4, 28, 41, 45, 46, 51], "still": [1, 41, 45, 46], "stochast": 40, "stop": [40, 62], "stopword": [13, 19], "store": [1, 12, 16, 41, 49, 51, 61, 65, 67], "stoword": 42, "str": [2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 63, 64, 65, 66, 67, 68, 71, 72, 73], "str_to_vec": 67, "str_vec": 67, "straightforward": 29, "strategi": [17, 30, 32, 38, 39, 42, 49, 54, 64], "stream": 35, "strictli": 1, "string": [0, 1, 2, 4, 8, 12, 13, 14, 19, 23, 24, 50, 64, 66, 67, 68, 71, 72, 73], "strongli": [1, 41, 61], "structur": [0, 36, 49], "student": [21, 33], "studi": [1, 34, 62], "style": [1, 31, 36, 59], "sub": [0, 1, 71], "subfold": 1, "subject": [5, 24, 28, 39, 49, 64], "subjunct": 50, "sublist": 28, "submiss": 55, "subpart": [1, 71], "subsequ": [1, 30, 51, 58], "subset": 62, "substanc": 36, "substant": 31, "substanti": 1, "substr": 30, "subtask": 1, "subtract": [41, 58], "succe": 62, "success": [0, 1, 4, 31, 36, 43, 55, 58], "suggest": [1, 13, 34, 42, 44, 50], "suit": [62, 64], "sum": [1, 28, 34, 61, 64, 65, 66, 72], "summar": [0, 1, 69], "summari": [65, 66, 72], "summariz": [0, 65], "summarize_featur": 69, "suppl": 6, "support": [1, 15, 42, 61], "suppos": 1, "sure": 30, "swear": 49, "symbol": 67, "syntax": [1, 32, 61], "system": [2, 59, 64], "t": [0, 1, 15, 29, 31, 36, 42, 45, 49, 54, 61, 62, 67], "tabl": [1, 62], "tag": 39, "take": [1, 4, 5, 9, 14, 25, 29, 31, 34, 37, 39, 42, 55, 61, 65, 67, 71], "taken": [59, 71], "talk": [1, 37, 47, 59, 62], "tandem": [1, 61], "target": 15, "task": [1, 2, 59, 71], "tausczik": [12, 37, 41, 52], "tausczikpennebaker2013": 12, "team": [0, 1, 4, 11, 12, 13, 34, 39, 40, 42, 59, 65], "team_bursti": 4, "team_comm_tool": [1, 61], "teamcommtool": 1, "technic": [29, 39, 61, 62], "teghxgbqdhgaaaaa": 5, "tempor": [0, 2, 55, 58, 64, 71], "temporal_featur": 11, "tend": [1, 34, 60], "term": [1, 28, 59, 67], "termin": [1, 2, 61], "terribl": 51, "test": [13, 33, 47], "text": [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 28, 32, 33, 36, 42, 48, 55, 62, 64, 67, 71], "text_based_featur": 64, "textblob": [24, 39, 51, 52, 64], "textblob_sentiment_analysi": 11, "than": [0, 1, 2, 11, 13, 31, 34, 35, 36, 37, 40, 41, 45, 46, 54, 60, 62, 63], "thee": 62, "thei": [0, 1, 11, 28, 29, 31, 34, 36, 37, 39, 42, 47, 58, 59, 61, 62, 67], "them": [0, 1, 2, 19, 28, 29, 31, 36, 50, 51, 55, 59, 61, 62, 64, 65, 66, 67], "themselv": [31, 36, 60], "theoret": 35, "theori": [34, 50], "therebi": 0, "therefor": [0, 1, 11, 28, 37, 45, 59, 62, 69], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 20, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 73], "thing": [48, 61], "think": [1, 38, 47], "thorough": [43, 62], "those": [1, 21, 31, 36, 61, 71], "though": [34, 42, 50], "thought": [1, 35, 45], "thread": [1, 61], "three": [0, 1, 2, 22, 34, 37, 40, 51, 61, 62, 69, 71], "threshold": [15, 47], "through": [1, 45, 46, 50, 61, 62], "throughout": [31, 35, 36, 40, 45, 46, 55, 63], "tht": 35, "thu": [1, 34, 35, 36, 37, 46, 55, 71], "time": [0, 1, 4, 23, 34, 35, 39, 42, 48, 51, 55, 59, 61, 62, 63, 64, 65, 66, 71], "time_diff": 55, "timediff": 4, "timestamp": [0, 1, 2, 8, 23, 58, 61, 62, 63, 64, 71], "timestamp_col": [0, 1, 2, 8, 61, 63, 64, 65, 71], "timestamp_end": [1, 23, 61, 64], "timestamp_start": [1, 23, 61, 64], "timestamp_unit": [0, 2, 23, 64], "to_datetim": [0, 2], "todai": [34, 35, 41, 43, 45, 46, 47], "todo": 66, "togeth": [0, 62, 66], "token": [16, 19, 39, 49, 54, 64, 67], "token_count": [19, 49], "too": [30, 31, 36, 62], "took": [1, 59], "tool": [1, 61, 62], "toolkit": [0, 1, 11, 42, 45, 46, 55, 62, 65, 66], "top": [1, 50, 59], "topic": [1, 13, 31, 34, 40, 42, 43, 65], "tormala": 5, "total": [0, 1, 3, 12, 16, 25, 31, 34, 36, 37, 41, 44, 53, 59, 60, 61, 62, 63, 64, 66, 72], "touch": [1, 61], "toward": [31, 36, 38, 42, 45, 46], "track": [65, 66], "tradit": 49, "train": [1, 2, 15, 64], "train_spacy_n": 15, "transcript": 0, "transfom": [45, 46], "transform": [1, 31, 34, 35, 36, 51], "transform_utter": 50, "treat": [0, 1, 42, 59, 61], "tri": [50, 64], "trivial": [3, 44, 62], "troubl": [1, 61], "true": [0, 1, 2, 37, 61, 63, 65, 66, 67, 71], "truncat": 2, "truth_intensifi": 49, "ttr": 64, "tupl": [0, 1, 2, 15, 19, 64], "turn": [0, 2, 25, 28, 31, 32, 37, 39, 61, 64, 65, 71], "turn_count": 59, "turn_df": 71, "turn_id": 71, "turn_taking_featur": 11, "twice": 63, "twitter": [1, 51, 61], "two": [0, 1, 2, 23, 31, 34, 36, 41, 45, 46, 52, 62, 63, 67], "txt": 19, "type": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 37, 39, 52, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "typic": [1, 34, 40, 41, 42, 52, 60], "u": [0, 1, 2, 22, 31, 36, 48, 49, 58], "uci": 16, "uh": [31, 36], "ulrich": 55, "um": [31, 36, 60], "umbrella": [8, 29, 34], "uncertain": [5, 30], "uncertainti": 30, "under": [0, 1, 10, 11, 12, 28, 40], "underli": [1, 61], "underscor": [1, 42, 61], "understand": [0, 33, 39, 43, 48, 58, 61, 62], "understood": 33, "unhash": 71, "uninterrupt": 59, "uniqu": [0, 1, 2, 6, 9, 13, 16, 23, 25, 41, 47, 52, 60, 61, 63, 71], "unit": [0, 2, 23], "univers": 62, "unix": 58, "unless": [31, 36], "unpack": 62, "unpreprocess": 0, "until": [31, 36, 45, 46], "unzip": [1, 61], "up": [1, 17, 21, 28, 31, 35, 36, 37, 42, 45, 46, 51, 59, 61, 67], "updat": [1, 9, 40, 54, 61], "upenn": 1, "upgrad": 50, "upload": 13, "upon": 33, "us": [0, 1, 2, 3, 5, 11, 12, 13, 17, 19, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 60, 62, 64, 65, 66, 67, 71], "usag": [0, 21, 24], "use_time_if_poss": 63, "user": [0, 1, 2, 9, 14, 15, 22, 37, 42, 47, 48, 51, 61, 62, 63, 64, 65, 66, 69, 72], "user_aggreg": [0, 1, 2, 65, 66], "user_column": [0, 1, 2, 65, 66], "user_data": [2, 65, 66], "user_df": 9, "user_level_featur": 2, "user_list": 9, "user_method": [0, 1, 2, 65, 66], "userlevelfeaturescalcul": [2, 66, 69], "usernam": [22, 48], "utf": 1, "util": [1, 12, 21, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "utilti": 62, "utter": [0, 1, 2, 3, 4, 5, 13, 14, 15, 16, 17, 20, 21, 23, 24, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 58, 60, 61, 67], "v": [0, 1, 13, 61], "v0": 0, "valenc": 51, "valid": [1, 23, 55, 67, 71], "valu": [0, 1, 2, 5, 6, 10, 12, 13, 18, 19, 23, 28, 30, 31, 34, 36, 37, 40, 41, 42, 45, 46, 47, 55, 59, 61, 64, 67, 68, 71, 72, 73], "valueerror": 71, "vari": [13, 31, 34, 35], "variabl": [1, 56, 57, 64, 65, 66], "varianc": [1, 8, 34], "variance_in_dd": 11, "variat": [4, 32], "varieti": [42, 62], "variou": [19, 42, 64, 65, 66], "vast": 62, "ve": [0, 31, 36, 50, 61], "vec": 6, "vect_data": [1, 7, 8, 28, 61, 64, 65, 66], "vect_path": 67, "vector": [0, 2, 6, 7, 8, 13, 28, 34, 35, 40, 55, 61, 64, 65, 67], "vector_data": [0, 1, 2, 61], "vector_directori": [0, 1, 2, 61, 65], "vein": 45, "verb": [19, 31, 36], "verbal": 32, "veri": [5, 28, 30, 31, 34, 35, 36, 42, 49, 54], "verifi": 2, "verit": 62, "version": [0, 1, 12, 14, 21, 28, 31, 40, 42, 50, 51, 61], "versu": [4, 29, 47, 55, 59], "via": [3, 44], "view": 50, "visit": 41, "voila": 62, "w": [31, 42], "wa": [0, 1, 2, 5, 12, 31, 32, 35, 36, 47, 51, 56, 59, 62, 71], "wai": [0, 1, 2, 29, 30, 31, 32, 34, 49, 50, 54, 56, 57, 61, 62, 66], "waiai": 62, "wait": [4, 55], "walk": 1, "walkthrough": [0, 61, 62], "want": [1, 28, 34, 59, 61, 62, 65, 66, 67], "warn": [1, 50, 71], "watt": [1, 2, 62, 71], "we": [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 15, 16, 18, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 57, 58, 59, 61, 62, 66, 67, 71], "web": 70, "websit": [1, 61], "week": 47, "weight": 66, "weigt": 31, "welcom": 61, "well": [11, 29, 31, 36, 55, 62], "went": 41, "were": [1, 2, 12, 31, 36, 42, 71], "western": 1, "wh": [19, 31, 36], "wh_question": [32, 49, 54], "wharton": 62, "what": [1, 2, 12, 16, 20, 29, 31, 32, 34, 35, 36, 39, 41, 45, 46, 47, 50, 54, 62, 63], "whatev": [1, 31, 36], "wheel": 62, "when": [1, 16, 20, 31, 33, 36, 42, 47, 54, 55, 59, 60, 61, 62, 67, 69, 71], "whenev": 71, "where": [1, 2, 19, 20, 28, 31, 32, 36, 37, 40, 41, 42, 48, 50, 51, 54, 59, 61, 65, 68, 73], "wherea": [31, 34, 35, 36, 43], "wherev": [31, 36], "whether": [1, 2, 10, 16, 19, 32, 37, 38, 41, 43, 47, 57, 58, 62, 63, 64, 67, 71], "which": [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 15, 16, 18, 23, 25, 28, 31, 34, 35, 36, 37, 38, 40, 41, 42, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 65, 66, 67, 68, 69, 71, 72, 73], "while": [1, 31, 32, 34, 36, 37, 44, 45, 46, 55, 62, 71], "whitespac": 43, "who": [1, 20, 31, 32, 36, 47, 51, 54, 59, 60, 62], "whole": [28, 42, 59, 62, 71], "whom": [31, 36, 54], "whose": [31, 36, 54], "why": [20, 29, 31, 36, 54], "wide": 31, "wien": 62, "wiki": [21, 29, 70], "wiki_link": [1, 61], "wikipedia": [21, 33, 37, 70], "williamson": 60, "wish": [1, 2, 18, 28], "within": [0, 1, 2, 8, 11, 16, 28, 30, 31, 34, 35, 36, 41, 45, 46, 52, 55, 59, 60, 62, 63, 64, 68, 71, 73], "within_group": 2, "within_person_discursive_rang": 11, "within_task": [0, 1, 2, 71], "without": [1, 19, 31, 36, 42, 47, 54, 62, 69], "won": [0, 31, 36, 45], "wonder": 56, "woolei": 4, "woollei": [13, 40, 55], "wooten": 55, "word": [0, 1, 3, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 28, 30, 32, 33, 37, 38, 39, 40, 41, 43, 45, 46, 48, 49, 52, 53, 54, 56, 57, 62, 64, 65, 66, 67, 69, 70], "word_mimicri": 11, "word_start": [19, 49], "wordcount": [1, 42], "wordnet": [1, 61], "words_in_lin": 19, "work": [0, 11, 42, 47, 50, 55, 61, 62], "world": 55, "worri": 62, "would": [1, 29, 31, 34, 35, 36, 37, 42, 50, 54, 62], "wouldn": [31, 36], "wow": 50, "wp": 13, "wrap": 42, "write": [2, 29, 60], "www": [12, 13, 18, 41, 64], "x": [0, 1, 2, 4, 46, 68], "xinlan": 62, "yashveer": 62, "ye": 19, "yeah": [31, 36], "yeoman": [18, 49, 50], "yesno_quest": [32, 49, 54], "yet": 48, "ylatau": 12, "you": [0, 1, 2, 11, 24, 29, 31, 36, 37, 42, 43, 47, 50, 59, 61, 62, 69], "your": [0, 29, 31, 32, 36, 37, 50, 59, 61, 62], "your_data": 42, "yourself": [31, 36, 50], "yuluan": 62, "yup": [31, 36], "yuxuan": 62, "z": [12, 39, 49, 51, 64, 73], "z0": 67, "za": 67, "zero": [13, 52], "zhang": 62, "zheng": 62, "zhong": 62, "zhou": 62, "zscore": 41, "zscore_chat": 41, "zscore_chats_and_convers": 69, "zscore_convers": 41, "\u00bc": 47, "\u03c4": 55}, "titles": ["The Basics (Get Started Here!)", "Worked Example", "feature_builder module", "basic_features module", "burstiness module", "certainty module", "discursive_diversity module", "fflow module", "get_all_DD_features module", "get_user_network module", "hedge module", "Features: Technical Documentation", "info_exchange_zscore module", "information_diversity module", "lexical_features_v2 module", "named_entity_recognition_features module", "other_lexical_features module", "politeness_features module", "politeness_v2 module", "politeness_v2_helper module", "question_num module", "readability module", "reddit_tags module", "temporal_features module", "textblob_sentiment_analysis module", "turn_taking_features module", "variance_in_DD module", "within_person_discursive_range module", "word_mimicry module", "FEATURE NAME", "Certainty", "Content Word Accommodation", "Conversational Repair", "Dale-Chall Score", "Discursive Diversity", "Forward Flow", "Function Word Accommodation", "Gini Coefficient", "Hedge", "Features: Conceptual Documentation", "Information Diversity", "Information Exchange", "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons", "Message Length", "Message Quantity", "Mimicry (BERT)", "Moving Mimicry", "Named Entity Recognition", "Online Discussion Tags", "Politeness/Receptiveness Markers", "Politeness Strategies", "Sentiment (RoBERTa)", "Positivity Z-Score", "Proportion of First Person Pronouns", "Question (Naive)", "Team Burstiness", "Textblob Polarity", "Textblob Subjectivity", "Time Difference", "Turn Taking Index", "Word Type-Token Ratio", "The Team Communication Toolkit", "Introduction", "assign_chunk_nums module", "calculate_chat_level_features module", "calculate_conversation_level_features module", "calculate_user_level_features module", "check_embeddings module", "gini_coefficient module", "Utilities", "preload_word_lists module", "preprocess module", "summarize_features module", "zscore_chats_and_conversation module"], "titleterms": {"0": 42, "1": 42, "5": 42, "A": 0, "One": 0, "The": [0, 61, 62], "accommod": [31, 36], "addit": 1, "advanc": 1, "aggreg": [1, 11], "analyz": 1, "assign_chunk_num": 63, "assumpt": 0, "base": 11, "basic": [0, 1, 29, 30, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60], "basic_featur": 3, "bert": 45, "bring": 42, "bursti": [4, 55], "cach": 1, "calculate_chat_level_featur": 64, "calculate_conversation_level_featur": 65, "calculate_user_level_featur": 66, "caveat": [1, 29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "certainti": [5, 30], "chall": 33, "chat": [11, 39], "check_embed": 67, "citat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "class": 69, "code": [0, 1], "coeffici": 37, "column": [1, 61], "commun": 61, "conceptu": 39, "configur": 1, "consider": 1, "content": [31, 61], "convers": [1, 11, 32, 39, 62, 69], "count": [42, 59], "cumul": 1, "custom": [1, 42], "customiz": 0, "dale": 33, "data": 1, "declar": 61, "demo": [0, 1], "detail": 1, "differ": 58, "directori": 1, "discurs": 34, "discursive_divers": 6, "discuss": 48, "divers": [34, 40], "document": [11, 39, 62], "driver": 69, "entiti": [1, 47], "environ": [1, 61], "exampl": [1, 41, 47], "exchang": 41, "featur": [1, 11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 69], "feature_build": 2, "featurebuild": [1, 61, 62], "fflow": 7, "file": [1, 30, 34, 35, 45, 46, 47, 51], "first": [1, 53], "flow": 35, "forward": 35, "function": [0, 36], "gener": [1, 61, 62], "get": [0, 1, 61, 62], "get_all_dd_featur": 8, "get_user_network": 9, "gini": 37, "gini_coeffici": 68, "group": 1, "hedg": [10, 38], "here": 0, "high": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "implement": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "import": [1, 61], "index": 59, "indic": 61, "info_exchange_zscor": 12, "inform": [1, 40, 41, 61], "information_divers": 13, "input": [1, 34], "inquiri": 42, "inspect": [1, 61], "interpret": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "introduct": 62, "intuit": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "kei": 0, "length": 43, "level": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "lexical_features_v2": 14, "lexicon": 42, "light": 0, "linguist": 42, "liwc": 42, "marker": 49, "messag": [43, 44], "mimicri": [45, 46], "modul": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "motiv": 62, "move": 46, "naiv": 54, "name": [1, 29, 47, 61], "named_entity_recognition_featur": 15, "new": 42, "note": [1, 29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "onlin": 48, "other": [42, 69], "other_lexical_featur": 16, "ouput": 34, "our": 62, "output": [1, 30, 35, 45, 46, 47, 51], "overview": 1, "own": 42, "packag": [0, 1, 61], "paramet": [0, 1], "percentag": 1, "person": 53, "pip": [1, 61], "polar": 56, "polit": [49, 50], "politeness_featur": 17, "politeness_v2": 18, "politeness_v2_help": 19, "posit": 52, "preload_word_list": 70, "preprocess": 71, "pronoun": 53, "proport": 53, "quantiti": 44, "question": 54, "question_num": 20, "ratio": 60, "readabl": 21, "recept": 49, "recognit": [1, 47], "recommend": [1, 61], "reddit_tag": 22, "regener": 1, "relat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "repair": 32, "roberta": 51, "run": 1, "sampl": [0, 1], "score": [33, 41, 52], "sentiment": 51, "speaker": [11, 59, 62, 69], "start": [0, 1, 61, 62], "strategi": 50, "subject": 57, "summarize_featur": 72, "tabl": 61, "tag": 48, "take": 59, "team": [55, 61, 62], "technic": 11, "temporal_featur": 23, "textblob": [56, 57], "textblob_sentiment_analysi": 24, "time": 58, "token": 60, "toolkit": 61, "touch": 0, "train": 47, "troubleshoot": [1, 61], "turn": [1, 59], "turn_taking_featur": 25, "type": 60, "us": 61, "usag": 1, "user": 11, "util": 69, "utter": [11, 39, 62, 69], "v": 42, "variance_in_dd": 26, "vector": 1, "virtual": [1, 61], "walkthrough": 1, "within_person_discursive_rang": 27, "word": [31, 36, 42, 60], "word_mimicri": 28, "work": 1, "your": [1, 42], "z": [41, 52], "zscore_chats_and_convers": 73}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"A Light-Touch, One-Function Package": [[0, "a-light-touch-one-function-package"]], "Additional FeatureBuilder Considerations": [[1, "additional-featurebuilder-considerations"]], "Advanced Configuration Columns": [[1, "advanced-configuration-columns"]], "Aggregation Overview": [[1, "id2"]], "Analyzing First Percentage (%)": [[1, "analyzing-first-percentage"]], "Base Conversation-Level Features": [[11, "base-conversation-level-features"]], "Basic Input Columns": [[1, "basic-input-columns"]], "Certainty": [[30, null]], "Citation": [[29, "citation"], [30, "citation"], [31, "citation"], [32, "citation"], [33, "citation"], [34, "citation"], [35, "citation"], [36, "citation"], [37, "citation"], [38, "citation"], [40, "citation"], [41, "citation"], [42, "citation"], [43, "citation"], [44, "citation"], [45, "citation"], [46, "citation"], [47, "citation"], [48, "citation"], [49, "citation"], [50, "citation"], [51, "citation"], [52, "citation"], [53, "citation"], [54, "citation"], [55, "citation"], [56, "citation"], [57, "citation"], [58, "citation"], [59, "citation"], [60, "citation"]], "Configuring the FeatureBuilder": [[1, "configuring-the-featurebuilder"]], "Content Word Accommodation": [[31, null]], "Contents:": [[61, null]], "Conversation Parameters": [[1, "conversation-parameters"]], "Conversation-Level Aggregates": [[11, "conversation-level-aggregates"]], "Conversation-Level Features": [[11, "conversation-level-features"], [39, "conversation-level-features"]], "Conversational Repair": [[32, null]], "Cumulative Grouping": [[1, "cumulative-grouping"]], "Custom Aggregation": [[1, "custom-aggregation"]], "Custom Features": [[1, "custom-features"]], "Customizable Parameters": [[0, "customizable-parameters"]], "Dale-Chall Score": [[33, null]], "Declaring a FeatureBuilder": [[61, "declaring-a-featurebuilder"]], "Demo / Sample Code": [[0, "demo-sample-code"], [1, "demo-sample-code"]], "Discursive Diversity": [[34, null]], "Example Usage of Custom Aggregation Parameters": [[1, "example-usage-of-custom-aggregation-parameters"]], "Example:": [[41, "example"]], "FEATURE NAME": [[29, null]], "Feature Column Names": [[1, "feature-column-names"], [61, "feature-column-names"]], "Feature Documentation": [[62, "feature-documentation"]], "Feature Information": [[1, "feature-information"], [61, "feature-information"]], "Features: Conceptual Documentation": [[39, null]], "Features: Technical Documentation": [[11, null]], "Forward Flow": [[35, null]], "Function Word Accommodation": [[36, null]], "Generating Features: Utterance-, Speaker-, and Conversation-Level": [[62, "generating-features-utterance-speaker-and-conversation-level"]], "Generating Vectors using GPU": [[1, "generating-vectors-using-gpu"]], "Getting Started": [[1, "getting-started"], [61, "getting-started"], [62, "getting-started"]], "Gini Coefficient": [[37, null]], "Hedge": [[38, null]], "High*Level Intuition": [[54, "high-level-intuition"]], "High-Level Intuition": [[29, "high-level-intuition"], [30, "high-level-intuition"], [31, "high-level-intuition"], [32, "high-level-intuition"], [33, "high-level-intuition"], [34, "high-level-intuition"], [35, "high-level-intuition"], [36, "high-level-intuition"], [37, "high-level-intuition"], [38, "high-level-intuition"], [40, "high-level-intuition"], [41, "high-level-intuition"], [42, "high-level-intuition"], [43, "high-level-intuition"], [44, "high-level-intuition"], [45, "high-level-intuition"], [46, "high-level-intuition"], [47, "high-level-intuition"], [48, "high-level-intuition"], [49, "high-level-intuition"], [50, "high-level-intuition"], [51, "high-level-intuition"], [52, "high-level-intuition"], [53, "high-level-intuition"], [55, "high-level-intuition"], [56, "high-level-intuition"], [57, "high-level-intuition"], [58, "high-level-intuition"], [59, "high-level-intuition"], [60, "high-level-intuition"]], "Implementation": [[32, "implementation"], [42, "implementation"], [52, "implementation"], [54, "implementation"]], "Implementation Basics": [[29, "implementation-basics"], [30, "implementation-basics"], [31, "implementation-basics"], [33, "implementation-basics"], [34, "implementation-basics"], [35, "implementation-basics"], [36, "implementation-basics"], [37, "implementation-basics"], [38, "implementation-basics"], [40, "implementation-basics"], [41, "implementation-basics"], [43, "implementation-basics"], [44, "implementation-basics"], [45, "implementation-basics"], [46, "implementation-basics"], [47, "implementation-basics"], [48, "implementation-basics"], [49, "implementation-basics"], [50, "implementation-basics"], [51, "implementation-basics"], [53, "implementation-basics"], [55, "implementation-basics"], [56, "implementation-basics"], [57, "implementation-basics"], [58, "implementation-basics"], [59, "implementation-basics"], [60, "implementation-basics"]], "Implementation Notes/Caveats": [[29, "implementation-notes-caveats"], [30, "implementation-notes-caveats"], [31, "implementation-notes-caveats"], [33, "implementation-notes-caveats"], [34, "implementation-notes-caveats"], [35, "implementation-notes-caveats"], [36, "implementation-notes-caveats"], [38, "implementation-notes-caveats"], [40, "implementation-notes-caveats"], [41, "implementation-notes-caveats"], [43, "implementation-notes-caveats"], [44, "implementation-notes-caveats"], [45, "implementation-notes-caveats"], [46, "implementation-notes-caveats"], [47, "implementation-notes-caveats"], [48, "implementation-notes-caveats"], [49, "implementation-notes-caveats"], [50, "implementation-notes-caveats"], [51, "implementation-notes-caveats"], [53, "implementation-notes-caveats"], [55, "implementation-notes-caveats"], [56, "implementation-notes-caveats"], [57, "implementation-notes-caveats"], [58, "implementation-notes-caveats"], [59, "implementation-notes-caveats"]], "Import Recommendations: Virtual Environment and Pip": [[1, "import-recommendations-virtual-environment-and-pip"], [61, "import-recommendations-virtual-environment-and-pip"]], "Important Notes and Caveats": [[1, "important-notes-and-caveats"]], "Importing the Package": [[1, "importing-the-package"]], "Indices and Tables": [[61, "indices-and-tables"]], "Information Diversity": [[40, null]], "Information Exchange": [[41, null]], "Input File": [[34, "id2"]], "Inspecting Generated Features": [[1, "inspecting-generated-features"], [61, "inspecting-generated-features"]], "Interpretation:": [[41, "interpretation"]], "Interpreting the Feature": [[29, "interpreting-the-feature"], [30, "interpreting-the-feature"], [31, "interpreting-the-feature"], [32, "interpreting-the-feature"], [33, "interpreting-the-feature"], [34, "interpreting-the-feature"], [35, "interpreting-the-feature"], [36, "interpreting-the-feature"], [37, "interpreting-the-feature"], [38, "interpreting-the-feature"], [40, "interpreting-the-feature"], [41, "interpreting-the-feature"], [42, "interpreting-the-feature"], [43, "interpreting-the-feature"], [44, "interpreting-the-feature"], [45, "interpreting-the-feature"], [46, "interpreting-the-feature"], [47, "interpreting-the-feature"], [48, "interpreting-the-feature"], [49, "interpreting-the-feature"], [50, "interpreting-the-feature"], [51, "interpreting-the-feature"], [52, "interpreting-the-feature"], [53, "interpreting-the-feature"], [54, "interpreting-the-feature"], [55, "interpreting-the-feature"], [56, "interpreting-the-feature"], [57, "interpreting-the-feature"], [58, "interpreting-the-feature"], [59, "interpreting-the-feature"], [60, "interpreting-the-feature"]], "Introduction": [[62, null]], "Key Assumptions and Parameters": [[0, "key-assumptions-and-parameters"]], "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons": [[42, null]], "Message Length": [[43, null]], "Message Quantity": [[44, null]], "Mimicry (BERT)": [[45, null]], "Motivation": [[62, "motivation"]], "Moving Mimicry": [[46, null]], "Named Entity Recognition": [[1, "named-entity-recognition"], [47, null]], "Named Entity Training Examples": [[47, "id2"]], "New in v.1.0.5: \u201cBring Your Own LIWC\u201d Custom Lexicon": [[42, "new-in-v-1-0-5-bring-your-own-liwc-custom-lexicon"]], "Online Discussion Tags": [[48, null]], "Other Utilities": [[69, "other-utilities"]], "Ouput File": [[34, "id3"]], "Our Team": [[62, "our-team"]], "Output File": [[30, "id2"], [35, "id2"], [45, "id2"], [46, "id2"], [47, "id3"], [51, "id1"]], "Output File Naming Details": [[1, "output-file-naming-details"]], "Package Assumptions": [[0, "package-assumptions"]], "Politeness Strategies": [[50, null]], "Politeness/Receptiveness Markers": [[49, null]], "Positivity Z-Score": [[52, null]], "Proportion of First Person Pronouns": [[53, null]], "Question (Naive)": [[54, null]], "Regenerating Vector Cache": [[1, "regenerating-vector-cache"]], "Related Features": [[29, "related-features"], [30, "related-features"], [31, "related-features"], [32, "related-features"], [33, "related-features"], [34, "related-features"], [35, "related-features"], [36, "related-features"], [37, "related-features"], [38, "related-features"], [40, "related-features"], [41, "related-features"], [42, "related-features"], [43, "related-features"], [44, "related-features"], [45, "related-features"], [46, "related-features"], [47, "related-features"], [48, "related-features"], [49, "related-features"], [50, "related-features"], [51, "related-features"], [52, "related-features"], [53, "related-features"], [54, "related-features"], [55, "related-features"], [56, "related-features"], [57, "related-features"], [58, "related-features"], [59, "related-features"], [60, "related-features"]], "Sentiment (RoBERTa)": [[51, null]], "Speaker Turn Counts": [[59, "id2"]], "Speaker- (User) Level Features": [[11, "speaker-user-level-features"]], "Table of Contents": [[61, "table-of-contents"]], "Team Burstiness": [[55, null]], "Textblob Polarity": [[56, null]], "Textblob Subjectivity": [[57, null]], "The Basics (Get Started Here!)": [[0, null]], "The FeatureBuilder": [[62, "the-featurebuilder"]], "The Team Communication Toolkit": [[61, null]], "Time Difference": [[58, null]], "Troubleshooting": [[1, "troubleshooting"], [61, "troubleshooting"]], "Turn Taking Index": [[59, null]], "Turns": [[1, "turns"]], "Using the Package": [[61, "using-the-package"]], "Utilities": [[69, null]], "Utterance- (Chat) Level Features": [[11, "utterance-chat-level-features"], [39, "utterance-chat-level-features"]], "Vector Directory": [[1, "vector-directory"]], "Walkthrough: Running the FeatureBuilder on Your Data": [[1, "walkthrough-running-the-featurebuilder-on-your-data"]], "Word Type-Token Ratio": [[60, null]], "Worked Example": [[1, null]], "assign_chunk_nums module": [[63, null]], "basic_features module": [[3, null]], "burstiness module": [[4, null]], "calculate_chat_level_features module": [[64, null]], "calculate_conversation_level_features module": [[65, null]], "calculate_user_level_features module": [[66, null]], "certainty module": [[5, null]], "check_embeddings module": [[67, null]], "discursive_diversity module": [[6, null]], "feature_builder module": [[2, null]], "fflow module": [[7, null]], "get_all_DD_features module": [[8, null]], "get_user_network module": [[9, null]], "gini_coefficient module": [[68, null]], "hedge module": [[10, null]], "info_exchange_zscore module": [[12, null]], "information_diversity module": [[13, null]], "lexical_features_v2 module": [[14, null]], "named_entity_recognition_features module": [[15, null]], "other_lexical_features module": [[16, null]], "politeness_features module": [[17, null]], "politeness_v2 module": [[18, null]], "politeness_v2_helper module": [[19, null]], "preload_word_lists module": [[70, null]], "preprocess module": [[71, null]], "question_num module": [[20, null]], "readability module": [[21, null]], "reddit_tags module": [[22, null]], "summarize_features module": [[72, null]], "temporal_features module": [[23, null]], "textblob_sentiment_analysis module": [[24, null]], "turn_taking_features module": [[25, null]], "variance_in_DD module": [[26, null]], "within_person_discursive_range module": [[27, null]], "word_mimicry module": [[28, null]], "z-scores:": [[41, "z-scores"]], "zscore_chats_and_conversation module": [[73, null]], "\u201cDriver\u201d Classes: Utterance-, Conversation-, and Speaker-Level Features": [[69, "driver-classes-utterance-conversation-and-speaker-level-features"]]}, "docnames": ["basics", "examples", "feature_builder", "features/basic_features", "features/burstiness", "features/certainty", "features/discursive_diversity", "features/fflow", "features/get_all_DD_features", "features/get_user_network", "features/hedge", "features/index", "features/info_exchange_zscore", "features/information_diversity", "features/lexical_features_v2", "features/named_entity_recognition_features", "features/other_lexical_features", "features/politeness_features", "features/politeness_v2", "features/politeness_v2_helper", "features/question_num", "features/readability", "features/reddit_tags", "features/temporal_features", "features/textblob_sentiment_analysis", "features/turn_taking_features", "features/variance_in_DD", "features/within_person_discursive_range", "features/word_mimicry", "features_conceptual/TEMPLATE", "features_conceptual/certainty", "features_conceptual/content_word_accommodation", "features_conceptual/conversational_repair", "features_conceptual/dale_chall_score", "features_conceptual/discursive_diversity", "features_conceptual/forward_flow", "features_conceptual/function_word_accommodation", "features_conceptual/gini_coefficient", "features_conceptual/hedge", "features_conceptual/index", "features_conceptual/information_diversity", "features_conceptual/information_exchange", "features_conceptual/liwc", "features_conceptual/message_length", "features_conceptual/message_quantity", "features_conceptual/mimicry_bert", "features_conceptual/moving_mimicry", "features_conceptual/named_entity_recognition", "features_conceptual/online_discussions_tags", "features_conceptual/politeness_receptiveness_markers", "features_conceptual/politeness_strategies", "features_conceptual/positivity_bert", "features_conceptual/positivity_z_score", "features_conceptual/proportion_of_first_person_pronouns", "features_conceptual/questions", "features_conceptual/team_burstiness", "features_conceptual/textblob_polarity", "features_conceptual/textblob_subjectivity", "features_conceptual/time_difference", "features_conceptual/turn_taking_index", "features_conceptual/word_ttr", "index", "intro", "utils/assign_chunk_nums", "utils/calculate_chat_level_features", "utils/calculate_conversation_level_features", "utils/calculate_user_level_features", "utils/check_embeddings", "utils/gini_coefficient", "utils/index", "utils/preload_word_lists", "utils/preprocess", "utils/summarize_features", "utils/zscore_chats_and_conversation"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["basics.rst", "examples.rst", "feature_builder.rst", "features/basic_features.rst", "features/burstiness.rst", "features/certainty.rst", "features/discursive_diversity.rst", "features/fflow.rst", "features/get_all_DD_features.rst", "features/get_user_network.rst", "features/hedge.rst", "features/index.rst", "features/info_exchange_zscore.rst", "features/information_diversity.rst", "features/lexical_features_v2.rst", "features/named_entity_recognition_features.rst", "features/other_lexical_features.rst", "features/politeness_features.rst", "features/politeness_v2.rst", "features/politeness_v2_helper.rst", "features/question_num.rst", "features/readability.rst", "features/reddit_tags.rst", "features/temporal_features.rst", "features/textblob_sentiment_analysis.rst", "features/turn_taking_features.rst", "features/variance_in_DD.rst", "features/within_person_discursive_range.rst", "features/word_mimicry.rst", "features_conceptual/TEMPLATE.rst", "features_conceptual/certainty.rst", "features_conceptual/content_word_accommodation.rst", "features_conceptual/conversational_repair.rst", "features_conceptual/dale_chall_score.rst", "features_conceptual/discursive_diversity.rst", "features_conceptual/forward_flow.rst", "features_conceptual/function_word_accommodation.rst", "features_conceptual/gini_coefficient.rst", "features_conceptual/hedge.rst", "features_conceptual/index.rst", "features_conceptual/information_diversity.rst", "features_conceptual/information_exchange.rst", "features_conceptual/liwc.rst", "features_conceptual/message_length.rst", "features_conceptual/message_quantity.rst", "features_conceptual/mimicry_bert.rst", "features_conceptual/moving_mimicry.rst", "features_conceptual/named_entity_recognition.rst", "features_conceptual/online_discussions_tags.rst", "features_conceptual/politeness_receptiveness_markers.rst", "features_conceptual/politeness_strategies.rst", "features_conceptual/positivity_bert.rst", "features_conceptual/positivity_z_score.rst", "features_conceptual/proportion_of_first_person_pronouns.rst", "features_conceptual/questions.rst", "features_conceptual/team_burstiness.rst", "features_conceptual/textblob_polarity.rst", "features_conceptual/textblob_subjectivity.rst", "features_conceptual/time_difference.rst", "features_conceptual/turn_taking_index.rst", "features_conceptual/word_ttr.rst", "index.rst", "intro.rst", "utils/assign_chunk_nums.rst", "utils/calculate_chat_level_features.rst", "utils/calculate_conversation_level_features.rst", "utils/calculate_user_level_features.rst", "utils/check_embeddings.rst", "utils/gini_coefficient.rst", "utils/index.rst", "utils/preload_word_lists.rst", "utils/preprocess.rst", "utils/summarize_features.rst", "utils/zscore_chats_and_conversation.rst"], "indexentries": {"adverb_limiter() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.adverb_limiter", false]], "assign_chunk_nums() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.assign_chunk_nums", false]], "bare_command() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.bare_command", false]], "built_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.built_spacy_ner", false]], "burstiness() (in module features.burstiness)": [[4, "features.burstiness.burstiness", false]], "calculate_chat_level_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_chat_level_features", false]], "calculate_conversation_level_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_conversation_level_features", false]], "calculate_hedge_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_hedge_features", false]], "calculate_id_score() (in module features.information_diversity)": [[13, "features.information_diversity.calculate_ID_score", false]], "calculate_info_diversity() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_info_diversity", false]], "calculate_named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.calculate_named_entities", false]], "calculate_num_question_naive() (in module features.question_num)": [[20, "features.question_num.calculate_num_question_naive", false]], "calculate_politeness_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_sentiment", false]], "calculate_politeness_v2() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_v2", false]], "calculate_team_burstiness() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_team_burstiness", false]], "calculate_textblob_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_textblob_sentiment", false]], "calculate_user_level_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.calculate_user_level_features", false]], "calculate_vector_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_vector_word_mimicry", false]], "calculate_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_word_mimicry", false]], "chat_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.chat_level_features", false]], "chatlevelfeaturescalculator (class in utils.calculate_chat_level_features)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator", false]], "check_embeddings() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.check_embeddings", false]], "classify_ntri() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.classify_NTRI", false]], "classify_text_dalechall() (in module features.readability)": [[21, "features.readability.classify_text_dalechall", false]], "clean_text() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.clean_text", false]], "commit_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.commit_data", false]], "compress() (in module utils.preprocess)": [[71, "utils.preprocess.compress", false]], "compute_frequency() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency", false]], "compute_frequency_per_conv() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency_per_conv", false]], "computetf() (in module features.word_mimicry)": [[28, "features.word_mimicry.computeTF", false]], "concat_bert_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.concat_bert_features", false]], "conjection_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.conjection_seperator", false]], "content_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score", false]], "content_mimicry_score_per_conv() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score_per_conv", false]], "conv_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.conv_level_features", false]], "conv_to_float_arr() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.conv_to_float_arr", false]], "conversationlevelfeaturescalculator (class in utils.calculate_conversation_level_features)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator", false]], "count_all_caps() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_all_caps", false]], "count_bullet_points() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_bullet_points", false]], "count_characters() (in module features.basic_features)": [[3, "features.basic_features.count_characters", false]], "count_difficult_words() (in module features.readability)": [[21, "features.readability.count_difficult_words", false]], "count_ellipses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_ellipses", false]], "count_emojis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emojis", false]], "count_emphasis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emphasis", false]], "count_line_breaks() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_line_breaks", false]], "count_links() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_links", false]], "count_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_matches", false]], "count_messages() (in module features.basic_features)": [[3, "features.basic_features.count_messages", false]], "count_numbering() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_numbering", false]], "count_parentheses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_parentheses", false]], "count_quotes() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_quotes", false]], "count_responding_to_someone() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_responding_to_someone", false]], "count_spacy_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_spacy_matches", false]], "count_syllables() (in module features.readability)": [[21, "features.readability.count_syllables", false]], "count_turn_taking_index() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turn_taking_index", false]], "count_turns() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turns", false]], "count_user_references() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_user_references", false]], "count_words() (in module features.basic_features)": [[3, "features.basic_features.count_words", false]], "create_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks", false]], "create_chunks_messages() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks_messages", false]], "create_cumulative_rows() (in module utils.preprocess)": [[71, "utils.preprocess.create_cumulative_rows", false]], "dale_chall_helper() (in module features.readability)": [[21, "features.readability.dale_chall_helper", false]], "feat_counts() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.feat_counts", false]], "feature_builder": [[2, "module-feature_builder", false]], "featurebuilder (class in feature_builder)": [[2, "feature_builder.FeatureBuilder", false]], "features.basic_features": [[3, "module-features.basic_features", false]], "features.burstiness": [[4, "module-features.burstiness", false]], "features.certainty": [[5, "module-features.certainty", false]], "features.discursive_diversity": [[6, "module-features.discursive_diversity", false]], "features.fflow": [[7, "module-features.fflow", false]], "features.get_all_dd_features": [[8, "module-features.get_all_DD_features", false]], "features.get_user_network": [[9, "module-features.get_user_network", false]], "features.hedge": [[10, "module-features.hedge", false]], "features.info_exchange_zscore": [[12, "module-features.info_exchange_zscore", false]], "features.information_diversity": [[13, "module-features.information_diversity", false]], "features.lexical_features_v2": [[14, "module-features.lexical_features_v2", false]], "features.named_entity_recognition_features": [[15, "module-features.named_entity_recognition_features", false]], "features.other_lexical_features": [[16, "module-features.other_lexical_features", false]], "features.politeness_features": [[17, "module-features.politeness_features", false]], "features.politeness_v2": [[18, "module-features.politeness_v2", false]], "features.politeness_v2_helper": [[19, "module-features.politeness_v2_helper", false]], "features.question_num": [[20, "module-features.question_num", false]], "features.readability": [[21, "module-features.readability", false]], "features.reddit_tags": [[22, "module-features.reddit_tags", false]], "features.temporal_features": [[23, "module-features.temporal_features", false]], "features.textblob_sentiment_analysis": [[24, "module-features.textblob_sentiment_analysis", false]], "features.turn_taking_features": [[25, "module-features.turn_taking_features", false]], "features.variance_in_dd": [[26, "module-features.variance_in_DD", false]], "features.within_person_discursive_range": [[27, "module-features.within_person_discursive_range", false]], "features.word_mimicry": [[28, "module-features.word_mimicry", false]], "featurize() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.featurize", false]], "fix_abbreviations() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.fix_abbreviations", false]], "function_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.function_mimicry_score", false]], "generate_bert() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_bert", false]], "generate_certainty_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_certainty_pkl", false]], "generate_lexicon_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_lexicon_pkl", false]], "generate_vect() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_vect", false]], "get_centroids() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_centroids", false]], "get_certainty() (in module features.certainty)": [[5, "features.certainty.get_certainty", false]], "get_certainty_score() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_certainty_score", false]], "get_content_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_content_words_in_message", false]], "get_conversation_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_conversation_level_aggregates", false]], "get_cosine_similarity() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_cosine_similarity", false]], "get_dale_chall_easy_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_dale_chall_easy_words", false]], "get_dale_chall_score_and_classfication() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_dale_chall_score_and_classfication", false]], "get_dd() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_DD", false]], "get_dd_features() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.get_DD_features", false]], "get_dep_pairs() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs", false]], "get_dep_pairs_noneg() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs_noneg", false]], "get_discursive_diversity_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_discursive_diversity_features", false]], "get_first_pct_of_chat() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.get_first_pct_of_chat", false]], "get_first_person_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_first_person_words", false]], "get_forward_flow() (in module features.fflow)": [[7, "features.fflow.get_forward_flow", false]], "get_forward_flow() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_forward_flow", false]], "get_function_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_function_words", false]], "get_function_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_function_words_in_message", false]], "get_gini() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.get_gini", false]], "get_gini_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_gini_features", false]], "get_info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.get_info_diversity", false]], "get_info_exchange_wordcount() (in module features.info_exchange_zscore)": [[12, "features.info_exchange_zscore.get_info_exchange_wordcount", false]], "get_liwc_count() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.get_liwc_count", false]], "get_max() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_max", false]], "get_mean() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_mean", false]], "get_median() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_median", false]], "get_mimicry_bert() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_mimicry_bert", false]], "get_min() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_min", false]], "get_moving_mimicry() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_moving_mimicry", false]], "get_named_entity() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_named_entity", false]], "get_nan_vector() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_nan_vector", false]], "get_nan_vector() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_nan_vector", false]], "get_polarity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_polarity_score", false]], "get_politeness_strategies() (in module features.politeness_features)": [[17, "features.politeness_features.get_politeness_strategies", false]], "get_politeness_v2() (in module features.politeness_v2)": [[18, "features.politeness_v2.get_politeness_v2", false]], "get_proportion_first_pronouns() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_proportion_first_pronouns", false]], "get_question_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_question_words", false]], "get_reddit_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_reddit_features", false]], "get_sentiment() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_sentiment", false]], "get_stdev() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_stdev", false]], "get_subjectivity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_subjectivity_score", false]], "get_sum() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_sum", false]], "get_team_burstiness() (in module features.burstiness)": [[4, "features.burstiness.get_team_burstiness", false]], "get_temporal_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_temporal_features", false]], "get_time_diff() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff", false]], "get_time_diff_startend() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff_startend", false]], "get_turn() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.get_turn", false]], "get_turn_id() (in module utils.preprocess)": [[71, "utils.preprocess.get_turn_id", false]], "get_turn_taking_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_turn_taking_features", false]], "get_unique_pairwise_combos() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_unique_pairwise_combos", false]], "get_user_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_user_level_aggregates", false]], "get_user_level_summary_statistics_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summary_statistics_features", false]], "get_user_level_summed_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summed_features", false]], "get_user_max_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_max_dataframe", false]], "get_user_mean_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_mean_dataframe", false]], "get_user_median_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_median_dataframe", false]], "get_user_min_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_min_dataframe", false]], "get_user_network() (in module features.get_user_network)": [[9, "features.get_user_network.get_user_network", false]], "get_user_network() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_network", false]], "get_user_stdev_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_stdev_dataframe", false]], "get_user_sum_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_sum_dataframe", false]], "get_variance_in_dd() (in module features.variance_in_dd)": [[26, "features.variance_in_DD.get_variance_in_DD", false]], "get_within_person_disc_range() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_within_person_disc_range", false]], "get_word_ttr() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_word_TTR", false]], "get_zscore_across_all_chats() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_chats", false]], "get_zscore_across_all_conversations() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_conversations", false]], "gini_coefficient() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.gini_coefficient", false]], "info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.info_diversity", false]], "info_exchange() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.info_exchange", false]], "is_hedged_sentence_1() (in module features.hedge)": [[10, "features.hedge.is_hedged_sentence_1", false]], "is_valid_term() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.is_valid_term", false]], "lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.lexical_features", false]], "liwc_features() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.liwc_features", false]], "load_custem_liwc_dict() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.load_custem_liwc_dict", false]], "load_liwc_dict() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.load_liwc_dict", false]], "load_saved_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_saved_data", false]], "load_to_dict() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_dict", false]], "load_to_lists() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_lists", false]], "merge_conv_data_with_original() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.merge_conv_data_with_original", false]], "mimic_words() (in module features.word_mimicry)": [[28, "features.word_mimicry.mimic_words", false]], "module": [[2, "module-feature_builder", false], [3, "module-features.basic_features", false], [4, "module-features.burstiness", false], [5, "module-features.certainty", false], [6, "module-features.discursive_diversity", false], [7, "module-features.fflow", false], [8, "module-features.get_all_DD_features", false], [9, "module-features.get_user_network", false], [10, "module-features.hedge", false], [12, "module-features.info_exchange_zscore", false], [13, "module-features.information_diversity", false], [14, "module-features.lexical_features_v2", false], [15, "module-features.named_entity_recognition_features", false], [16, "module-features.other_lexical_features", false], [17, "module-features.politeness_features", false], [18, "module-features.politeness_v2", false], [19, "module-features.politeness_v2_helper", false], [20, "module-features.question_num", false], [21, "module-features.readability", false], [22, "module-features.reddit_tags", false], [23, "module-features.temporal_features", false], [24, "module-features.textblob_sentiment_analysis", false], [25, "module-features.turn_taking_features", false], [26, "module-features.variance_in_DD", false], [27, "module-features.within_person_discursive_range", false], [28, "module-features.word_mimicry", false], [63, "module-utils.assign_chunk_nums", false], [64, "module-utils.calculate_chat_level_features", false], [65, "module-utils.calculate_conversation_level_features", false], [66, "module-utils.calculate_user_level_features", false], [67, "module-utils.check_embeddings", false], [68, "module-utils.gini_coefficient", false], [70, "module-utils.preload_word_lists", false], [71, "module-utils.preprocess", false], [72, "module-utils.summarize_features", false], [73, "module-utils.zscore_chats_and_conversation", false]], "named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.named_entities", false]], "num_named_entity() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.num_named_entity", false]], "other_lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.other_lexical_features", false]], "phrase_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.phrase_split", false]], "positivity_zscore() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.positivity_zscore", false]], "prep_simple() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_simple", false]], "prep_whole() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_whole", false]], "preprocess_chat_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.preprocess_chat_data", false]], "preprocess_conversation_columns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_conversation_columns", false]], "preprocess_naive_turns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_naive_turns", false]], "preprocess_text() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text", false]], "preprocess_text_lowercase_but_retain_punctuation() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text_lowercase_but_retain_punctuation", false]], "preprocessing() (in module features.information_diversity)": [[13, "features.information_diversity.preprocessing", false]], "punctuation_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.punctuation_seperator", false]], "question() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.Question", false]], "read_in_lexicons() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.read_in_lexicons", false]], "reduce_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.reduce_chunks", false]], "remove_active_user() (in module features.get_user_network)": [[9, "features.get_user_network.remove_active_user", false]], "remove_unhashable_cols() (in module utils.preprocess)": [[71, "utils.preprocess.remove_unhashable_cols", false]], "save_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.save_features", false]], "sentence_pad() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_pad", false]], "sentence_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_split", false]], "sentenciser() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentenciser", false]], "set_self_conv_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.set_self_conv_data", false]], "sort_words() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.sort_words", false]], "str_to_vec() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.str_to_vec", false]], "text_based_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.text_based_features", false]], "token_count() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.token_count", false]], "train_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.train_spacy_ner", false]], "user_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.user_level_features", false]], "userlevelfeaturescalculator (class in utils.calculate_user_level_features)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator", false]], "utils.assign_chunk_nums": [[63, "module-utils.assign_chunk_nums", false]], "utils.calculate_chat_level_features": [[64, "module-utils.calculate_chat_level_features", false]], "utils.calculate_conversation_level_features": [[65, "module-utils.calculate_conversation_level_features", false]], "utils.calculate_user_level_features": [[66, "module-utils.calculate_user_level_features", false]], "utils.check_embeddings": [[67, "module-utils.check_embeddings", false]], "utils.gini_coefficient": [[68, "module-utils.gini_coefficient", false]], "utils.preload_word_lists": [[70, "module-utils.preload_word_lists", false]], "utils.preprocess": [[71, "module-utils.preprocess", false]], "utils.summarize_features": [[72, "module-utils.summarize_features", false]], "utils.zscore_chats_and_conversation": [[73, "module-utils.zscore_chats_and_conversation", false]], "verify_timestamp_format() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.verify_timestamp_format", false]], "word_start() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.word_start", false]]}, "objects": {"": [[2, 0, 0, "-", "feature_builder"]], "feature_builder": [[2, 1, 1, "", "FeatureBuilder"]], "feature_builder.FeatureBuilder": [[2, 2, 1, "", "chat_level_features"], [2, 2, 1, "", "conv_level_features"], [2, 2, 1, "", "featurize"], [2, 2, 1, "", "get_first_pct_of_chat"], [2, 2, 1, "", "load_custem_liwc_dict"], [2, 2, 1, "", "merge_conv_data_with_original"], [2, 2, 1, "", "preprocess_chat_data"], [2, 2, 1, "", "save_features"], [2, 2, 1, "", "set_self_conv_data"], [2, 2, 1, "", "user_level_features"], [2, 2, 1, "", "verify_timestamp_format"]], "features": [[3, 0, 0, "-", "basic_features"], [4, 0, 0, "-", "burstiness"], [5, 0, 0, "-", "certainty"], [6, 0, 0, "-", "discursive_diversity"], [7, 0, 0, "-", "fflow"], [8, 0, 0, "-", "get_all_DD_features"], [9, 0, 0, "-", "get_user_network"], [10, 0, 0, "-", "hedge"], [12, 0, 0, "-", "info_exchange_zscore"], [13, 0, 0, "-", "information_diversity"], [14, 0, 0, "-", "lexical_features_v2"], [15, 0, 0, "-", "named_entity_recognition_features"], [16, 0, 0, "-", "other_lexical_features"], [17, 0, 0, "-", "politeness_features"], [18, 0, 0, "-", "politeness_v2"], [19, 0, 0, "-", "politeness_v2_helper"], [20, 0, 0, "-", "question_num"], [21, 0, 0, "-", "readability"], [22, 0, 0, "-", "reddit_tags"], [23, 0, 0, "-", "temporal_features"], [24, 0, 0, "-", "textblob_sentiment_analysis"], [25, 0, 0, "-", "turn_taking_features"], [26, 0, 0, "-", "variance_in_DD"], [27, 0, 0, "-", "within_person_discursive_range"], [28, 0, 0, "-", "word_mimicry"]], "features.basic_features": [[3, 3, 1, "", "count_characters"], [3, 3, 1, "", "count_messages"], [3, 3, 1, "", "count_words"]], "features.burstiness": [[4, 3, 1, "", "burstiness"], [4, 3, 1, "", "get_team_burstiness"]], "features.certainty": [[5, 3, 1, "", "get_certainty"]], "features.discursive_diversity": [[6, 3, 1, "", "get_DD"], [6, 3, 1, "", "get_cosine_similarity"], [6, 3, 1, "", "get_unique_pairwise_combos"]], "features.fflow": [[7, 3, 1, "", "get_forward_flow"]], "features.get_all_DD_features": [[8, 3, 1, "", "conv_to_float_arr"], [8, 3, 1, "", "get_DD_features"]], "features.get_user_network": [[9, 3, 1, "", "get_user_network"], [9, 3, 1, "", "remove_active_user"]], "features.hedge": [[10, 3, 1, "", "is_hedged_sentence_1"]], "features.info_exchange_zscore": [[12, 3, 1, "", "get_info_exchange_wordcount"]], "features.information_diversity": [[13, 3, 1, "", "calculate_ID_score"], [13, 3, 1, "", "get_info_diversity"], [13, 3, 1, "", "info_diversity"], [13, 3, 1, "", "preprocessing"]], "features.lexical_features_v2": [[14, 3, 1, "", "get_liwc_count"], [14, 3, 1, "", "liwc_features"]], "features.named_entity_recognition_features": [[15, 3, 1, "", "built_spacy_ner"], [15, 3, 1, "", "calculate_named_entities"], [15, 3, 1, "", "named_entities"], [15, 3, 1, "", "num_named_entity"], [15, 3, 1, "", "train_spacy_ner"]], "features.other_lexical_features": [[16, 3, 1, "", "classify_NTRI"], [16, 3, 1, "", "get_proportion_first_pronouns"], [16, 3, 1, "", "get_word_TTR"]], "features.politeness_features": [[17, 3, 1, "", "get_politeness_strategies"]], "features.politeness_v2": [[18, 3, 1, "", "get_politeness_v2"]], "features.politeness_v2_helper": [[19, 3, 1, "", "Question"], [19, 3, 1, "", "adverb_limiter"], [19, 3, 1, "", "bare_command"], [19, 3, 1, "", "clean_text"], [19, 3, 1, "", "commit_data"], [19, 3, 1, "", "conjection_seperator"], [19, 3, 1, "", "count_matches"], [19, 3, 1, "", "count_spacy_matches"], [19, 3, 1, "", "feat_counts"], [19, 3, 1, "", "get_dep_pairs"], [19, 3, 1, "", "get_dep_pairs_noneg"], [19, 3, 1, "", "load_saved_data"], [19, 3, 1, "", "load_to_dict"], [19, 3, 1, "", "load_to_lists"], [19, 3, 1, "", "phrase_split"], [19, 3, 1, "", "prep_simple"], [19, 3, 1, "", "prep_whole"], [19, 3, 1, "", "punctuation_seperator"], [19, 3, 1, "", "sentence_pad"], [19, 3, 1, "", "sentence_split"], [19, 3, 1, "", "sentenciser"], [19, 3, 1, "", "token_count"], [19, 3, 1, "", "word_start"]], "features.question_num": [[20, 3, 1, "", "calculate_num_question_naive"]], "features.readability": [[21, 3, 1, "", "classify_text_dalechall"], [21, 3, 1, "", "count_difficult_words"], [21, 3, 1, "", "count_syllables"], [21, 3, 1, "", "dale_chall_helper"]], "features.reddit_tags": [[22, 3, 1, "", "count_all_caps"], [22, 3, 1, "", "count_bullet_points"], [22, 3, 1, "", "count_ellipses"], [22, 3, 1, "", "count_emojis"], [22, 3, 1, "", "count_emphasis"], [22, 3, 1, "", "count_line_breaks"], [22, 3, 1, "", "count_links"], [22, 3, 1, "", "count_numbering"], [22, 3, 1, "", "count_parentheses"], [22, 3, 1, "", "count_quotes"], [22, 3, 1, "", "count_responding_to_someone"], [22, 3, 1, "", "count_user_references"]], "features.temporal_features": [[23, 3, 1, "", "get_time_diff"], [23, 3, 1, "", "get_time_diff_startend"]], "features.textblob_sentiment_analysis": [[24, 3, 1, "", "get_polarity_score"], [24, 3, 1, "", "get_subjectivity_score"]], "features.turn_taking_features": [[25, 3, 1, "", "count_turn_taking_index"], [25, 3, 1, "", "count_turns"], [25, 3, 1, "", "get_turn"]], "features.variance_in_DD": [[26, 3, 1, "", "get_variance_in_DD"]], "features.within_person_discursive_range": [[27, 3, 1, "", "get_nan_vector"], [27, 3, 1, "", "get_within_person_disc_range"]], "features.word_mimicry": [[28, 3, 1, "", "Content_mimicry_score"], [28, 3, 1, "", "Content_mimicry_score_per_conv"], [28, 3, 1, "", "computeTF"], [28, 3, 1, "", "compute_frequency"], [28, 3, 1, "", "compute_frequency_per_conv"], [28, 3, 1, "", "function_mimicry_score"], [28, 3, 1, "", "get_content_words_in_message"], [28, 3, 1, "", "get_function_words_in_message"], [28, 3, 1, "", "get_mimicry_bert"], [28, 3, 1, "", "get_moving_mimicry"], [28, 3, 1, "", "mimic_words"]], "utils": [[63, 0, 0, "-", "assign_chunk_nums"], [64, 0, 0, "-", "calculate_chat_level_features"], [65, 0, 0, "-", "calculate_conversation_level_features"], [66, 0, 0, "-", "calculate_user_level_features"], [67, 0, 0, "-", "check_embeddings"], [68, 0, 0, "-", "gini_coefficient"], [70, 0, 0, "-", "preload_word_lists"], [71, 0, 0, "-", "preprocess"], [72, 0, 0, "-", "summarize_features"], [73, 0, 0, "-", "zscore_chats_and_conversation"]], "utils.assign_chunk_nums": [[63, 3, 1, "", "assign_chunk_nums"], [63, 3, 1, "", "create_chunks"], [63, 3, 1, "", "create_chunks_messages"], [63, 3, 1, "", "reduce_chunks"]], "utils.calculate_chat_level_features": [[64, 1, 1, "", "ChatLevelFeaturesCalculator"]], "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator": [[64, 2, 1, "", "calculate_chat_level_features"], [64, 2, 1, "", "calculate_hedge_features"], [64, 2, 1, "", "calculate_politeness_sentiment"], [64, 2, 1, "", "calculate_politeness_v2"], [64, 2, 1, "", "calculate_textblob_sentiment"], [64, 2, 1, "", "calculate_vector_word_mimicry"], [64, 2, 1, "", "calculate_word_mimicry"], [64, 2, 1, "", "concat_bert_features"], [64, 2, 1, "", "get_certainty_score"], [64, 2, 1, "", "get_dale_chall_score_and_classfication"], [64, 2, 1, "", "get_forward_flow"], [64, 2, 1, "", "get_named_entity"], [64, 2, 1, "", "get_reddit_features"], [64, 2, 1, "", "get_temporal_features"], [64, 2, 1, "", "info_exchange"], [64, 2, 1, "", "lexical_features"], [64, 2, 1, "", "other_lexical_features"], [64, 2, 1, "", "positivity_zscore"], [64, 2, 1, "", "text_based_features"]], "utils.calculate_conversation_level_features": [[65, 1, 1, "", "ConversationLevelFeaturesCalculator"]], "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator": [[65, 2, 1, "", "calculate_conversation_level_features"], [65, 2, 1, "", "calculate_info_diversity"], [65, 2, 1, "", "calculate_team_burstiness"], [65, 2, 1, "", "get_conversation_level_aggregates"], [65, 2, 1, "", "get_discursive_diversity_features"], [65, 2, 1, "", "get_gini_features"], [65, 2, 1, "", "get_turn_taking_features"], [65, 2, 1, "", "get_user_level_aggregates"]], "utils.calculate_user_level_features": [[66, 1, 1, "", "UserLevelFeaturesCalculator"]], "utils.calculate_user_level_features.UserLevelFeaturesCalculator": [[66, 2, 1, "", "calculate_user_level_features"], [66, 2, 1, "", "get_centroids"], [66, 2, 1, "", "get_user_level_summary_statistics_features"], [66, 2, 1, "", "get_user_level_summed_features"], [66, 2, 1, "", "get_user_network"]], "utils.check_embeddings": [[67, 3, 1, "", "check_embeddings"], [67, 3, 1, "", "fix_abbreviations"], [67, 3, 1, "", "generate_bert"], [67, 3, 1, "", "generate_certainty_pkl"], [67, 3, 1, "", "generate_lexicon_pkl"], [67, 3, 1, "", "generate_vect"], [67, 3, 1, "", "get_nan_vector"], [67, 3, 1, "", "get_sentiment"], [67, 3, 1, "", "is_valid_term"], [67, 3, 1, "", "load_liwc_dict"], [67, 3, 1, "", "read_in_lexicons"], [67, 3, 1, "", "sort_words"], [67, 3, 1, "", "str_to_vec"]], "utils.gini_coefficient": [[68, 3, 1, "", "get_gini"], [68, 3, 1, "", "gini_coefficient"]], "utils.preload_word_lists": [[70, 3, 1, "", "get_dale_chall_easy_words"], [70, 3, 1, "", "get_first_person_words"], [70, 3, 1, "", "get_function_words"], [70, 3, 1, "", "get_question_words"]], "utils.preprocess": [[71, 3, 1, "", "compress"], [71, 3, 1, "", "create_cumulative_rows"], [71, 3, 1, "", "get_turn_id"], [71, 3, 1, "", "preprocess_conversation_columns"], [71, 3, 1, "", "preprocess_naive_turns"], [71, 3, 1, "", "preprocess_text"], [71, 3, 1, "", "preprocess_text_lowercase_but_retain_punctuation"], [71, 3, 1, "", "remove_unhashable_cols"]], "utils.summarize_features": [[72, 3, 1, "", "get_max"], [72, 3, 1, "", "get_mean"], [72, 3, 1, "", "get_median"], [72, 3, 1, "", "get_min"], [72, 3, 1, "", "get_stdev"], [72, 3, 1, "", "get_sum"], [72, 3, 1, "", "get_user_max_dataframe"], [72, 3, 1, "", "get_user_mean_dataframe"], [72, 3, 1, "", "get_user_median_dataframe"], [72, 3, 1, "", "get_user_min_dataframe"], [72, 3, 1, "", "get_user_stdev_dataframe"], [72, 3, 1, "", "get_user_sum_dataframe"]], "utils.zscore_chats_and_conversation": [[73, 3, 1, "", "get_zscore_across_all_chats"], [73, 3, 1, "", "get_zscore_across_all_conversations"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "terms": {"": [0, 1, 2, 4, 5, 9, 11, 13, 14, 25, 28, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 64, 65, 66], "0": [0, 1, 2, 5, 10, 13, 16, 21, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 45, 46, 47, 50, 51, 53, 55, 59, 61], "00222437221134802": [5, 64], "01": 51, "02": 51, "04": 40, "0496": [21, 33], "05": [13, 40, 50, 51], "06": 51, "08": [42, 50], "09": [45, 46, 50], "1": [0, 1, 2, 3, 10, 13, 22, 24, 32, 34, 35, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 51, 53, 55, 56, 57, 59, 61, 62, 67], "10": [1, 5, 6, 21, 24, 33, 59, 61, 64], "100": [1, 21, 33, 37, 42, 47, 62], "10th": 33, "1145": [21, 24], "1177": [5, 64], "11th": 33, "12": [35, 45, 46, 50], "1287": 6, "12th": 33, "13": 50, "14": 50, "15": [1, 37, 50], "1579": [21, 33], "17": 50, "1948": 33, "195": 36, "1977": 62, "1d": 67, "1lpngokujsx": 5, "1st": 50, "1st_person": 50, "1st_person_pl": 50, "1st_person_start": 50, "2": [0, 1, 2, 34, 35, 41, 47, 59, 61, 62, 67], "20": [37, 59], "2004": 42, "2007": [0, 5, 42, 67], "2009": 60, "2012": 55, "2013": [12, 16, 31, 32, 36, 37, 38, 41, 43, 49, 50, 52, 54, 70], "2015": [42, 53, 58, 60, 67], "2016": 4, "2017": 13, "2018": [40, 44, 55], "2019": [35, 52], "2020": [18, 21, 24, 33, 49, 50, 56, 57], "2021": [1, 6, 43, 44], "2022": [13, 34], "2023": [1, 5, 30, 59, 61, 64], "2024": [40, 42], "21": 59, "22": [41, 50], "2384068": 4, "24": [1, 61], "25": 47, "27": [42, 50], "28": 50, "29": 50, "2nd": 50, "2nd_person": 50, "2nd_person_start": 50, "3": [0, 1, 2, 21, 34, 41, 42, 51, 59, 61, 67, 71], "30": 50, "3000": 33, "32": [34, 50], "3432929": [21, 24], "35": 51, "36": 50, "38": 50, "39": 49, "39512260": 68, "3n": 59, "4": [0, 1, 5, 13, 21, 30, 33, 41, 42, 56, 61, 62, 71], "4274": 6, "43": 50, "45": 50, "47": 50, "49": 50, "4pit4bqz6": 5, "4th": [21, 33], "5": [1, 5, 21, 30, 33, 37, 41, 59], "50": [1, 47], "52": 50, "53": 50, "57": 50, "58": 50, "5th": 33, "6": [1, 33, 43], "60": 51, "63": 50, "6365": 21, "64": 67, "68": 47, "6th": 33, "7": [30, 33, 48], "70": 50, "78": [35, 50], "7th": 33, "8": [1, 30, 33, 42, 67], "80": [21, 70], "82": 41, "85": 34, "86": 35, "87": 50, "89": [45, 46], "8th": 33, "9": [2, 5, 21, 30, 33, 40, 47, 50, 67], "9123": 47, "92": 51, "93chall_readability_formula": [21, 70], "94": 15, "95": 47, "95450": 42, "97": 51, "9855072464": 47, "9992": 47, "99954": 47, "9th": 33, "A": [1, 2, 4, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 28, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 47, 49, 50, 51, 52, 57, 59, 60, 61, 62, 64, 66, 67, 68, 70, 71, 72, 73], "And": [1, 62], "As": [1, 31, 35, 36, 40, 42, 45, 61], "Be": 1, "But": [1, 50, 62], "By": [0, 1, 11, 42, 50], "For": [0, 1, 31, 34, 37, 41, 42, 43, 47, 49, 54, 56, 59, 62, 65], "If": [0, 1, 2, 5, 21, 29, 30, 35, 42, 45, 47, 50, 55, 61, 62, 63, 65, 66, 67, 71], "In": [1, 21, 30, 31, 34, 35, 36, 37, 39, 41, 42, 45, 46, 47, 50, 55, 59, 61, 62], "It": [1, 2, 31, 32, 33, 36, 37, 41, 44, 45, 46, 50, 64, 65, 66, 67, 71], "NO": 37, "NOT": [1, 61], "No": [19, 50, 53], "Not": 41, "One": [1, 37, 61], "That": [29, 55], "The": [1, 2, 3, 4, 5, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "Then": [1, 55, 61], "There": [1, 11, 32, 61, 66], "These": [1, 11, 17, 32, 34, 42, 48, 52, 61, 62, 69], "To": [0, 1, 29, 31, 34, 37, 40, 42, 55, 56, 57, 61, 62], "WITH": 21, "Will": 50, "_deviat": 55, "_lexical_wordcount_custom": 42, "_preprocessed_": 0, "abbrevi": 67, "abil": [13, 29], "abl": [31, 36, 61], "abort": 1, "about": [1, 12, 29, 31, 36, 41, 47, 61, 62], "abov": [1, 21, 34, 61], "abstract_id": 4, "academ": 42, "accept": [0, 1, 58, 61], "access": [0, 1, 11, 15, 61], "accommod": [28, 32, 39, 45, 46, 64], "accord": [21, 37, 59, 64, 70], "accordingli": 63, "account": [1, 29, 32, 42], "accus": 50, "achiev": [50, 62], "acknowledg": 49, "acm": [21, 24], "acommod": 36, "across": [1, 13, 28, 31, 34, 40, 41, 50, 62, 64, 73], "action": 59, "activ": [1, 9, 44, 55, 71], "actual": [41, 56], "ad": [61, 62, 71], "adapt": 59, "add": [0, 1, 2, 21, 51, 61], "addit": [2, 32, 34, 42, 63, 69], "addition": [0, 30, 31, 32, 54], "address": 1, "adjac": 71, "adjust": [0, 21, 37, 63], "advanc": [31, 36], "advantag": 4, "adverb": [19, 31, 36], "adverb_limit": [19, 49], "affect": [0, 1, 29, 35, 44], "affirm": 49, "after": [0, 1, 31, 34, 36, 42, 43, 61, 62, 64, 67], "again": [32, 34, 67], "against": [28, 31, 36, 52, 67], "agarw": 62, "aggreg": [0, 2, 3, 37, 44, 61, 62, 65, 66, 72], "agre": 47, "agreement": 49, "ah": [31, 36], "ai": 62, "aim": [39, 62], "airtim": [37, 62], "al": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "algorithm": [56, 57], "align": [35, 51], "all": [0, 1, 2, 6, 11, 12, 13, 15, 19, 22, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 46, 48, 49, 51, 52, 55, 58, 61, 62, 64, 65, 66, 71, 73], "allow": [0, 1, 67], "almaatouq": 59, "alon": 67, "along": 1, "alongsid": 1, "alphabet": 49, "alphanumer": [42, 67, 71], "alreadi": [0, 1, 2, 4, 10, 12, 16, 67], "also": [0, 1, 2, 28, 30, 31, 32, 34, 36, 37, 38, 42, 47, 51, 54, 60, 61, 62, 64, 65, 67, 69], "alsobai": 59, "altern": 59, "although": [1, 23, 31, 36], "alwai": [1, 55], "am": [31, 36, 42, 54, 62], "amaz": [48, 56], "ambient": 32, "american": 33, "ami": [47, 59, 62], "amic": 62, "among": [36, 37, 52, 55, 62], "amongst": [6, 35, 48], "an": [0, 1, 2, 5, 8, 11, 12, 13, 21, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 45, 47, 48, 49, 50, 51, 52, 54, 59, 60, 61, 62, 63, 65, 66, 67, 68, 71], "analys": [1, 62], "analysi": [0, 1, 11, 52, 62, 67, 71], "analyt": 62, "analyz": [0, 2, 13, 14, 16, 17, 19, 20, 21, 22, 24, 28, 43, 52, 62, 67, 71], "analyze_first_pct": [0, 1, 2], "angri": 47, "ani": [0, 1, 29, 31, 33, 38, 54, 62, 71], "annot": [17, 50], "anoth": [30, 34, 36, 48], "answer": 29, "anybodi": [31, 36], "anyth": [1, 31, 36, 56], "anywher": [31, 36], "apartment": 42, "api": [2, 47], "api_refer": 24, "apolog": [17, 50], "apologi": 49, "appear": [0, 15, 28, 31, 37, 38, 42, 64, 67], "append": [1, 17, 42, 64, 65, 66, 67], "appli": [4, 13, 18, 62, 64, 69], "applic": [29, 71], "appreci": 50, "approach": [32, 38, 42, 45, 46, 49, 53, 64], "appropri": [1, 31, 69], "ar": [0, 1, 2, 3, 5, 9, 10, 11, 15, 17, 19, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 71], "arcross": 34, "area": 62, "aren": [31, 36], "around": 2, "arous": 48, "arrai": [6, 8, 67, 68], "articl": [37, 50], "ask": [20, 47, 54], "ask_ag": 49, "aspect": [50, 62], "assign": [1, 31, 36, 38, 45, 46, 52, 59, 61, 63, 71], "assign_chunk_num": 69, "associ": [1, 4, 15, 21, 29, 30, 31, 32, 36, 40, 45, 46, 47, 48, 61], "assum": [0, 1, 2, 10, 12, 16, 23, 31, 41, 60, 61, 64, 67, 71], "assumpt": [1, 41, 61], "asterisk": 22, "attribut": [0, 1, 11, 34, 51, 52, 56, 62], "author": [5, 31, 36, 59], "auto": 2, "automat": [0, 1, 61, 69], "auxiliari": [31, 36], "avail": [0, 1, 61, 62, 63, 64, 67], "averag": [1, 11, 13, 28, 30, 33, 34, 35, 40, 41, 46, 52, 64, 65, 72], "avil": 62, "avoid": 30, "awar": 29, "awesom": 62, "b": [4, 34, 35, 42, 45, 46, 55, 62], "bachelor": 42, "back": 62, "bag": [32, 38, 42, 45, 46, 49, 53, 56, 57], "bare_command": [19, 49], "base": [0, 1, 2, 15, 18, 19, 31, 32, 34, 35, 36, 37, 40, 42, 51, 52, 53, 54, 55, 56, 57, 61, 62, 63, 64, 65, 66, 71], "basic": [10, 11, 12, 16, 61, 62], "basic_featur": 11, "batch": 67, "batch_num": 1, "batch_siz": 67, "bay": [56, 57], "bbevi": 18, "becaus": [1, 2, 12, 21, 31, 36, 40, 42, 56, 61], "becom": [44, 61, 62], "been": [1, 12, 16, 31, 36, 61], "befor": [0, 1, 2, 17, 31, 36, 45, 48], "beforehand": 64, "begin": [34, 42, 54, 58, 61, 62, 63], "behavior": [0, 1, 11, 62, 63], "being": [4, 13, 14, 16, 17, 20, 21, 24, 31, 32, 36, 43, 47, 51, 55, 56, 60], "belong": 1, "below": [1, 11, 21, 33, 36, 45, 48, 51, 61, 62, 67, 69], "ber": 54, "bert": [0, 1, 2, 31, 35, 36, 39, 46, 61, 64, 67], "bert_path": 67, "bert_sentiment_data": [1, 61, 64], "best": 29, "better": [31, 61], "between": [4, 6, 13, 21, 23, 24, 28, 30, 31, 34, 35, 36, 37, 40, 45, 46, 55, 58, 59, 62, 64, 65, 67], "betwen": 34, "beyond": 2, "big": 59, "binari": [10, 32, 38], "blame": 47, "blob": [1, 24, 61, 67], "block": [22, 32, 48, 59], "blog": 15, "bodi": 67, "bold": [22, 64], "bool": [2, 63, 65, 66, 67, 71], "boolean": 1, "bootstrap": 62, "both": [0, 1, 2, 42, 52, 54, 55, 59, 62], "bother": 50, "bottom": 59, "bought": 41, "bound": [29, 35, 36, 37, 52, 55], "boundari": [34, 35, 42], "boyd": [0, 42], "break": [22, 48, 64], "brief": 44, "bring": 0, "broader": 52, "broken": 59, "btw": 50, "bug": [1, 61], "build": [1, 7, 34, 45, 46, 62], "built": [1, 11, 42, 67], "built_spacy_n": 15, "bullet": [22, 48, 64], "bunch": 59, "burst": 58, "bursti": [1, 11, 39, 58, 61, 65], "by_the_wai": 49, "c": [12, 34, 35, 45, 46, 62], "cach": [0, 2, 51, 61], "calcul": [1, 2, 5, 11, 12, 16, 18, 21, 28, 33, 41, 48, 49, 50, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68, 72, 73], "calculate_chat_level_featur": [1, 61, 69], "calculate_conversation_level_featur": 69, "calculate_hedge_featur": 64, "calculate_id_scor": 13, "calculate_info_divers": 65, "calculate_named_ent": 15, "calculate_num_question_na": 20, "calculate_politeness_senti": 64, "calculate_politeness_v2": 64, "calculate_team_bursti": 65, "calculate_textblob_senti": 64, "calculate_user_level_featur": 69, "calculate_vector_word_mimicri": 64, "calculate_word_mimicri": 64, "call": [1, 2, 8, 11, 13, 61, 62, 64, 69], "can": [0, 1, 2, 11, 31, 32, 33, 34, 36, 37, 42, 43, 44, 47, 48, 49, 50, 52, 54, 60, 61, 62, 67, 69], "can_you": 49, "cannot": [1, 2, 31, 36, 45, 46, 49, 62], "cao": [21, 24, 33, 43, 44, 56, 57, 62], "cap": [22, 48, 64], "capit": [0, 2, 48], "captur": [29, 30, 32, 34, 35, 38, 41, 42, 55], "caract": 40, "cardiffnlp": [1, 61], "care": 1, "carefulli": 60, "carri": 31, "casa_token": 5, "case": [1, 13, 16, 28, 29, 30, 31, 36, 37, 41, 45, 46, 51, 55, 56, 59, 61], "casual": 43, "categori": [21, 32, 42, 45, 46, 49, 52, 67], "caus": [31, 32, 36, 59], "caveat": 42, "center": 62, "central": 34, "centroid": [34, 66], "certain": [5, 19, 30, 42, 45, 46, 49, 71], "certainli": 42, "certainti": [11, 38, 39, 42, 64, 67], "cfm": 4, "chall": [1, 21, 39, 64, 70], "chang": [0, 1, 34, 50, 61, 71], "charact": [1, 2, 3, 15, 19, 37, 42, 49, 62, 64, 65, 66, 67, 71], "characterist": [1, 62], "chat": [0, 1, 2, 4, 5, 6, 7, 8, 12, 13, 14, 16, 23, 25, 28, 29, 32, 35, 36, 41, 44, 45, 46, 49, 59, 61, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "chat_data": [2, 6, 7, 8, 26, 27, 28, 63, 64, 65, 66, 67, 71], "chat_df": 14, "chat_featur": [1, 42, 61, 65, 66], "chat_level_data": 72, "chat_level_featur": 2, "chatlevelfeaturescalcul": [1, 2, 17, 21, 61, 64, 69], "chats_data": 73, "check": [19, 44, 64, 67], "check_embed": [1, 61, 69], "chen": 62, "choic": 1, "choos": [1, 60], "chose": 1, "chronolog": 1, "chunk": [34, 59, 63], "chunk_num": 63, "circlelyt": 13, "citat": [21, 24], "cite": 50, "clarif": [16, 32, 64], "class": [1, 2, 31, 61, 62, 64, 65, 66], "classif": [21, 64], "classifi": [16, 21, 50, 56, 57], "classify_ntri": 16, "classify_text_dalechal": 21, "clean": [2, 17, 19, 67, 71], "clean_text": 19, "clear": 1, "close": [31, 42, 48, 62], "closer": [45, 46, 59], "clue": 62, "cmu": 12, "code": [6, 18, 29, 32, 51, 55, 61, 62, 68], "coeffici": [1, 4, 39, 62, 65, 68], "cognit": 62, "col": 2, "colab": [0, 1], "collabor": [59, 62], "collaps": 2, "collect": [1, 2, 34, 49, 50, 52, 61, 62], "colleg": 33, "column": [0, 2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 18, 23, 25, 28, 42, 51, 56, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "column_count_frequ": 28, "column_count_mim": 28, "column_mimc": 28, "column_nam": 71, "column_to_summar": 72, "com": [1, 2, 4, 5, 13, 15, 18, 64, 67, 68, 71], "comb": 62, "combin": [0, 1, 6, 28, 64, 71], "come": [1, 12, 13, 21, 32, 33, 42, 58, 61], "comm": [1, 61], "command": [1, 61], "comment": 48, "commit": 23, "commit_data": 19, "common": [0, 32, 62, 64], "commonli": 37, "commun": [0, 1, 11, 42, 44, 48, 55, 60, 62, 64], "companion": 1, "compar": [31, 35, 44, 45, 52, 64, 71, 73], "compat": [0, 1, 61], "complement": [31, 36], "complet": [1, 2, 31, 55], "complex": [0, 35, 43, 50, 62], "compon": 50, "compos": 2, "comprehens": [33, 48], "compress": 71, "comput": [0, 1, 2, 4, 5, 6, 10, 11, 12, 13, 14, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 45, 46, 49, 50, 52, 55, 62, 64, 65, 66, 67, 69, 73], "compute_frequ": 28, "compute_frequency_per_conv": 28, "compute_vectors_from_preprocess": [0, 2], "computetf": 28, "conain": 61, "concat_bert_featur": [1, 61, 64], "concaten": [19, 49, 64, 71], "concentr": 55, "concept": [29, 39, 42, 62], "conceptu": [61, 62], "concis": 43, "concret": 29, "conduct": 1, "confid": [2, 5, 15, 30, 47, 64], "conflict": 62, "confound": 44, "congruent": 34, "conjection_seper": 19, "conjunct": [19, 31, 36, 49], "conjunction_start": 49, "connect": 39, "conscious": 35, "consecut": 22, "consequ": [0, 1], "consid": [1, 2, 33, 37], "consider": [61, 62], "consist": [31, 36, 40, 41], "constitut": 41, "constrain": [34, 35], "construct": [1, 11, 55, 62], "constructor": 47, "consult": 5, "contact": 0, "contain": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 25, 28, 29, 30, 35, 38, 42, 47, 49, 50, 55, 61, 62, 63, 64, 67, 71, 72, 73], "content": [0, 1, 12, 13, 28, 34, 36, 39, 41, 42, 45, 46, 62, 64, 67], "content_mimicry_scor": 28, "content_mimicry_score_per_conv": 28, "content_word_accommod": 31, "content_word_accommodation_per_conv": 31, "content_word_mimicri": 28, "contentcod": 67, "contentcodingdictionari": 67, "context": [2, 32, 42, 48, 62, 71], "continu": [56, 57], "contract": 49, "contrast": 39, "contribut": [13, 34, 37, 62], "control": 1, "conv": [1, 61], "conv_data": [2, 65], "conv_features_al": [1, 61], "conv_features_bas": [1, 11, 61], "conv_level_featur": 2, "conv_to_float_arr": 8, "convei": [6, 34, 52], "conveni": [1, 61], "convers": [0, 2, 3, 4, 6, 7, 8, 9, 12, 13, 23, 25, 28, 29, 31, 34, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 58, 59, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_id": [2, 28, 61, 71], "conversation_id_col": [0, 1, 2, 4, 6, 7, 8, 9, 13, 23, 25, 26, 27, 61, 63, 64, 65, 66, 68, 72, 73], "conversation_num": [0, 1, 2, 6, 7, 64, 66, 73], "conversationlevelfeaturescalcul": [2, 65, 69], "convert": [8, 41, 49, 67, 71], "convict": 5, "convo_aggreg": [0, 1, 2, 65], "convo_column": [0, 1, 2, 65], "convo_method": [0, 1, 2, 65], "convokit": [17, 50, 62, 64], "coordin": 55, "copi": [0, 1, 42], "copular": [31, 36], "core": [34, 69], "cornel": 17, "corpu": [0, 1, 50], "corrado": 37, "correl": [41, 55], "correspond": [30, 34, 35, 40, 49, 55, 66], "cosin": [6, 7, 13, 28, 31, 34, 35, 36, 40, 45, 46, 65], "could": [1, 31, 33, 36, 50, 54], "could_you": 49, "couldn": [31, 36], "count": [0, 1, 3, 12, 14, 15, 16, 19, 21, 25, 28, 30, 31, 32, 36, 39, 41, 43, 44, 49, 52, 53, 54, 56, 58, 64, 65, 66], "count_all_cap": 22, "count_bullet_point": 22, "count_charact": 3, "count_difficult_word": 21, "count_ellips": 22, "count_emoji": 22, "count_emphasi": 22, "count_line_break": 22, "count_link": 22, "count_match": [19, 49], "count_messag": 3, "count_numb": 22, "count_parenthes": 22, "count_quot": 22, "count_responding_to_someon": 22, "count_spacy_match": 19, "count_syl": 21, "count_turn": 25, "count_turn_taking_index": 25, "count_user_refer": 22, "count_word": 3, "countabl": [1, 65], "countd": 36, "counterfactu": 50, "cours": [16, 31, 34, 36, 63], "cover": 28, "cpu": [0, 1, 67], "creat": [0, 1, 2, 13, 19, 31, 40, 42, 61, 62, 64, 65, 66, 71], "create_chunk": 63, "create_chunks_messag": 63, "create_cumulative_row": 71, "credit": 33, "criteria": 67, "crowd": 13, "csv": [1, 2, 61, 62, 67], "cuda": 67, "cumul": [2, 71], "cumulative_group": [0, 1, 2, 71], "current": [1, 11, 23, 31, 34, 35, 36, 40, 45, 46, 58, 61, 64, 71], "curt": 43, "custom": [0, 2, 11, 14, 62], "custom_featur": [0, 1, 2, 61], "custom_liwc_dictionari": [14, 64], "custom_liwc_dictionary_path": [0, 2, 42], "customiz": 62, "cut": 1, "cutoff": [2, 15, 47, 64], "d": [0, 1, 2, 31, 34, 36, 61], "dale": [1, 21, 39, 64, 70], "dale_chall_help": 21, "danescu": [49, 50], "dash": 22, "data": [0, 2, 6, 7, 8, 9, 13, 19, 20, 32, 37, 40, 41, 47, 51, 55, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "datafram": [0, 1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 37, 42, 47, 49, 59, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "dataknowsal": 15, "dataset": [1, 2, 9, 12, 13, 28, 31, 41, 47, 52, 61, 64, 65, 66, 73], "date": [1, 42, 61], "datetim": [2, 58], "dcosta": 62, "deal": [50, 59], "death": 1, "debat": 59, "decid": [1, 62], "decis": [1, 13, 62], "declar": [1, 62, 69], "deepli": 62, "default": [0, 1, 2, 5, 11, 13, 16, 23, 30, 34, 35, 42, 47, 62, 63, 64, 66, 67, 71, 73], "defer": [17, 50], "defin": [0, 11, 21, 31, 34, 36, 40, 59, 62, 64, 65, 66, 70], "definit": [1, 3, 44], "degre": [6, 30, 36, 45, 46, 55], "delet": 29, "deliber": 1, "demo": 61, "democrat": 1, "demonstr": 1, "demystifi": 62, "denomin": 59, "denot": 42, "densiti": 60, "dep_": 49, "dep_pair": 19, "depend": [0, 1, 10, 19, 32, 49, 52, 61, 63], "deriv": [2, 11, 65, 66], "descend": 67, "describ": [1, 11, 62], "descript": [1, 61], "design": [0, 1, 2, 13, 34, 62], "desir": [2, 63, 72], "detail": [0, 11, 33, 41, 43, 61, 62], "detect": [1, 32, 37, 38, 47, 48, 49, 54], "determin": [13, 18, 31, 35, 36, 40, 45, 46, 71], "dev": 24, "develop": [5, 37, 40, 62], "deviat": [4, 5, 29, 40, 41, 55, 58, 65, 72, 73], "devic": 67, "df": [4, 8, 9, 12, 13, 16, 18, 23, 28, 63, 71], "dic": [2, 14, 42, 67], "diccategori": 67, "dict": [2, 14, 17, 19, 28, 64, 67, 71], "dicterm": 67, "dictext": 67, "dictionari": [0, 1, 2, 14, 15, 17, 19, 28, 30, 42, 49, 61, 64, 67, 71], "did": [1, 31, 36, 37, 47, 50, 54, 62], "didn": [31, 36], "differ": [0, 1, 2, 4, 11, 12, 23, 28, 29, 31, 34, 36, 37, 39, 40, 44, 45, 46, 47, 49, 55, 62, 63, 64, 65, 66, 67, 71], "differenti": [49, 59], "difficult": [21, 33], "difficult_word": 21, "difficulti": 33, "dimens": [40, 62], "dimension": [34, 35], "dinner": 41, "direct": [34, 43, 45, 47, 50, 69], "direct_quest": [32, 50, 54], "direct_start": 50, "directli": [1, 62, 69], "directori": [0, 2, 19, 61, 65, 67], "disabl": 1, "disagr": 49, "disagre": 51, "discours": [31, 36], "discret": [31, 36, 45, 46], "discurs": [0, 1, 6, 8, 39, 40, 61, 65, 66], "discursive_divers": 11, "discus": 8, "discuss": [0, 1, 31, 34, 39, 40, 42, 43, 61, 62, 71], "dispers": 68, "displai": [1, 34, 42, 46, 61], "dispos": 1, "distanc": [34, 35, 40], "distinct": [31, 36, 59], "distinguish": 59, "distribut": 31, "div": 16, "diverg": [6, 34, 35], "divers": [0, 1, 6, 8, 13, 39, 61, 65], "divid": [16, 34, 59, 63], "dl": [21, 24], "do": [0, 1, 29, 31, 34, 36, 37, 43, 49, 50, 54, 62, 69], "doc": [2, 19], "doc_top": 13, "document": [1, 17, 61, 69], "doe": [1, 2, 29, 40, 42, 43, 45, 47, 54, 61, 71], "doesn": [0, 1, 29, 31, 36, 42, 45, 61, 67], "doi": [5, 6, 21, 24, 64], "domain": [31, 50], "don": [31, 36, 49, 54, 62, 67], "done": [2, 50], "dot": 22, "doubl": 30, "down": [31, 36], "download": [1, 61], "download_resourc": [1, 61], "downstream": [17, 62], "dozen": 62, "drive": [62, 69], "driver": [2, 61, 64, 65, 66], "drop": [0, 2, 64], "due": [34, 59], "duncan": 62, "duplic": [1, 2, 71], "durat": [58, 63], "dure": [2, 55, 59, 62], "dynam": [59, 61], "e": [0, 1, 2, 4, 15, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 47, 48, 49, 52, 54, 56, 59, 61, 63, 65, 66, 67, 71], "e2": [21, 70], "each": [0, 1, 2, 3, 4, 7, 8, 9, 11, 12, 15, 17, 19, 23, 25, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "earlier": [0, 1, 2, 42], "easi": [1, 21, 62, 70], "easier": 21, "easili": 33, "easy_word": 21, "eat": 34, "echo": 31, "econom": 37, "edg": [29, 59], "edu": [1, 12, 16, 17, 70], "effect": [1, 41], "effici": 1, "effort": 55, "either": [2, 20, 52, 55, 67], "elaps": 58, "element": [1, 6, 67], "ellips": [22, 48, 64], "els": [1, 22, 47, 64], "embed": [8, 31, 34, 35, 36, 45, 46, 65, 66, 67, 69], "emili": [30, 35, 45, 46, 47, 59, 62], "emoji": [22, 42, 48, 64, 67, 71], "emot": [1, 61], "emoticon": 48, "emphas": [22, 48, 64], "emphasi": 48, "empirica": [1, 2, 71], "emploi": 45, "empti": [0, 2, 13, 64, 67], "en": [1, 21, 24, 61, 70], "en_core_web_sm": [1, 61], "enabl": 71, "enclos": 22, "encod": [1, 8], "encompass": 62, "encount": [1, 34, 35, 61], "encourag": 64, "end": [0, 1, 15, 20, 23, 34, 42, 54, 62, 63, 67], "engag": 43, "engin": 2, "english": [34, 42], "enjoi": 62, "ensur": [0, 1, 40, 42, 49, 61, 63, 67], "entir": [0, 1, 12, 28, 31, 36, 40, 41, 52, 59, 62, 73], "entiti": [0, 2, 15, 39, 64], "entityrecogn": 47, "entri": [1, 28, 61], "ep8dauru1ogvjurwdbof5h6ayfbslvughjyiv31d_as6ppbt": 5, "equal": [1, 21, 34, 37, 40, 55, 59, 61, 62, 63], "equival": [0, 1, 41, 55, 61], "eric": 62, "error": [1, 16, 61, 71], "escap": 42, "especi": [41, 62], "essenti": 51, "establish": 31, "estim": 31, "et": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "etc": [10, 15, 16, 17, 42], "evalu": [5, 47, 50], "evan": 62, "even": [0, 1, 2, 34, 37, 42, 62, 63, 67], "evenli": [34, 55], "event": [1, 34, 55, 61], "ever": 62, "everi": [1, 4, 13, 31, 34, 35, 36, 61, 62], "everybodi": [31, 36], "everyon": [31, 36, 47, 62], "everyth": [31, 36, 56], "everywher": [31, 36], "evolut": 35, "evolv": [35, 71], "exactli": [1, 2, 71], "examin": [40, 62, 63], "exampl": [0, 10, 11, 15, 21, 24, 29, 31, 32, 34, 37, 42, 43, 48, 50, 51, 54, 56, 59, 60, 61, 62, 67], "example_data": 1, "exce": 15, "except": [42, 67, 71], "exchang": [12, 35, 39, 40, 45, 55, 64], "exclud": [0, 41, 42], "exclus": [41, 42], "excus": 32, "exhibit": 35, "exist": [0, 1, 2, 55, 61, 62, 63, 64, 67], "expand": 49, "expect": [1, 37, 42, 47], "expected_valu": 47, "explain": [0, 29], "explan": [29, 43], "explor": [61, 62], "express": [5, 14, 30, 31, 32, 36, 38, 42, 64, 67], "extend": 1, "extens": [43, 44], "extent": [1, 4, 7, 12, 31, 34, 35, 37, 51, 55, 59, 61], "extern": 48, "extra": 51, "extract": [1, 17, 19, 28, 40, 50, 64], "extrem": [55, 56, 57], "face": [1, 51, 61], "facilit": [62, 71], "fact": [4, 35, 50, 54, 59], "factual": [17, 24, 50], "fail": [1, 61], "fals": [0, 1, 2, 31, 54, 61, 67, 71], "famili": 42, "far": [34, 35, 46, 50, 62], "faster": 14, "feat_count": 19, "featur": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67], "feature_build": [0, 1, 42, 61, 64], "feature_dict": [1, 61], "feature_method": [64, 65], "feature_nam": [1, 61], "featurebuild": [0, 2, 11, 42, 47, 69], "features_conceptu": [1, 61], "feauturebuild": 1, "few": [48, 62], "fewer": [12, 60], "fflow": 11, "field": [13, 17], "file": [0, 2, 12, 14, 19, 42, 61, 65, 67], "filenam": [1, 2, 19], "filenotfounderror": 67, "filler": [37, 60], "filler_paus": 49, "filter": [19, 62], "final": [1, 2, 34, 42, 62], "find": [1, 19, 28, 50], "fingertip": 62, "finit": 55, "first": [0, 2, 11, 12, 16, 19, 31, 34, 35, 36, 39, 40, 41, 42, 45, 46, 49, 52, 54, 59, 61, 62, 64, 67, 70, 71], "first_person": 12, "first_person_plur": 49, "first_person_raw": [12, 16], "first_person_singl": 49, "five": 37, "fix": [52, 67], "fix_abbrevi": 67, "flag": 71, "float": [0, 2, 4, 5, 6, 8, 10, 13, 14, 16, 21, 24, 25, 28, 68], "floor": 59, "flow": [0, 1, 7, 31, 36, 39, 41, 45, 46, 61, 64], "focal": [31, 36], "focu": 41, "folder": [0, 1, 19], "follow": [0, 1, 2, 11, 16, 17, 29, 31, 32, 33, 41, 42, 47, 49, 50, 53, 55, 59, 60, 61, 64, 65, 67], "for_m": 49, "for_you": 49, "forc": [0, 1, 61], "form": 1, "formal": [1, 61], "formal_titl": 49, "format": [0, 1, 8, 17, 22, 42, 47, 48, 61, 62, 64, 67], "former": [45, 46], "formula": [33, 42, 59, 64, 70], "fornt": 1, "forward": [0, 1, 7, 39, 41, 61, 64], "forward_flow": 35, "found": [1, 2, 5, 28, 30, 33, 61, 69], "four": [1, 8], "fourth": 33, "frac": 55, "fraction": 59, "framework": [49, 50, 62], "frequenc": [28, 31, 44, 64], "frequency_dict": 28, "fridai": 34, "from": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 19, 21, 28, 29, 31, 32, 33, 34, 35, 36, 39, 41, 42, 49, 50, 51, 53, 55, 56, 57, 58, 61, 62, 64, 65, 66, 67, 71], "full": [1, 28, 37], "full_empirical_dataset": 1, "fulli": [32, 48], "functinon": 12, "function": [1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 21, 28, 31, 39, 44, 45, 46, 50, 56, 57, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73], "function_mimic_word": 28, "function_mimicry_scor": 28, "function_word_mimicri": 28, "function_word_refer": 28, "fund": 62, "further": [1, 61, 71], "furthermor": 42, "futur": [23, 66], "g": [0, 1, 2, 4, 15, 20, 29, 31, 32, 36, 37, 38, 41, 42, 47, 48, 52, 54, 59, 61, 63, 65, 66, 67, 71], "game": [1, 2, 59, 71], "gaug": [33, 52], "gener": [0, 2, 9, 11, 12, 16, 21, 31, 34, 35, 36, 40, 42, 45, 46, 49, 51, 59, 65, 66, 67, 69, 71, 72], "generaliz": 23, "generate_bert": 67, "generate_certainty_pkl": 67, "generate_lexicon_pkl": 67, "generate_vect": 67, "gensim": 40, "get": [16, 20, 21, 28, 30, 31, 36, 49, 66, 67], "get_all_dd_featur": 11, "get_centroid": 66, "get_certainti": 5, "get_certainty_scor": 64, "get_content_words_in_messag": 28, "get_conversation_level_aggreg": 65, "get_cosine_similar": 6, "get_dale_chall_easy_word": [21, 70], "get_dale_chall_score_and_classf": 64, "get_dd": 6, "get_dd_featur": 8, "get_dep_pair": [19, 49], "get_dep_pairs_noneg": [19, 49], "get_discursive_diversity_featur": 65, "get_first_pct_of_chat": 2, "get_first_person_word": [12, 70], "get_forward_flow": [7, 64], "get_function_word": 70, "get_function_words_in_messag": 28, "get_gini": 68, "get_gini_featur": 65, "get_info_divers": 13, "get_info_exchange_wordcount": 12, "get_liwc_count": 14, "get_max": 72, "get_mean": 72, "get_median": 72, "get_mimicry_bert": 28, "get_min": 72, "get_moving_mimicri": 28, "get_named_ent": 64, "get_nan_vector": [27, 67], "get_polarity_scor": 24, "get_politeness_strategi": 17, "get_politeness_v2": 18, "get_proportion_first_pronoun": 16, "get_question_word": 70, "get_reddit_featur": 64, "get_senti": 67, "get_stdev": 72, "get_subjectivity_scor": 24, "get_sum": 72, "get_team_bursti": 4, "get_temporal_featur": [4, 64], "get_time_diff": 23, "get_time_diff_startend": 23, "get_turn": 25, "get_turn_id": 71, "get_turn_taking_featur": 65, "get_unique_pairwise_combo": 6, "get_user_level_aggreg": 65, "get_user_level_summary_statistics_featur": 66, "get_user_level_summed_featur": 66, "get_user_max_datafram": 72, "get_user_mean_datafram": 72, "get_user_median_datafram": 72, "get_user_min_datafram": 72, "get_user_network": [11, 66], "get_user_stdev_datafram": 72, "get_user_sum_datafram": 72, "get_variance_in_dd": 26, "get_within_person_disc_rang": 27, "get_word_ttr": 16, "get_zscore_across_all_chat": 73, "get_zscore_across_all_convers": 73, "gina": 62, "gini": [1, 39, 62, 65, 68], "gini_coeffici": [11, 69], "github": [0, 1, 2, 18, 67, 71], "give": [0, 1, 29, 37], "give_ag": 49, "given": [0, 1, 5, 6, 13, 14, 28, 30, 31, 33, 34, 35, 36, 40, 41, 55, 59, 66, 67, 71], "go": [1, 34, 35, 45, 46, 50, 62], "goal": 62, "goe": 67, "good": [50, 56, 62], "goodby": 49, "googl": [0, 1], "got": [31, 36], "gotta": [31, 36], "gpu": [0, 2, 67], "grade": 33, "grader": 21, "grai": 35, "grammat": 36, "granularli": 35, "grate": [42, 62], "gratitud": [17, 49, 50], "great": [47, 50, 51, 56, 59, 60, 62], "greater": 55, "greet": 50, "groceri": 41, "group": [0, 2, 4, 13, 29, 33, 34, 41, 52, 59, 62, 68, 71, 72], "grouping_kei": [0, 1, 2, 71], "gt": 22, "guess": 10, "gun": 1, "gy": 15, "gym": 34, "ha": [0, 1, 32, 34, 35, 37, 42, 43, 46, 52, 54, 55, 56, 59, 61, 62, 63, 67, 71], "had": [1, 31, 36, 54, 61], "hadn": [31, 36], "handl": [19, 29], "happen": [1, 2, 55, 62, 63], "happi": 42, "hardcod": 67, "harder": 21, "hashedg": [17, 50], "hasn": [31, 36], "hasneg": 50, "hasposit": 50, "hate": 31, "have": [0, 1, 10, 12, 16, 31, 34, 36, 37, 40, 41, 42, 45, 46, 50, 54, 59, 60, 61, 62, 71], "haven": [31, 36], "he": [1, 31, 36], "header": [18, 67], "hear": 32, "heart": [61, 62], "heat": 1, "heavi": 62, "hedg": [11, 30, 39, 49, 50, 64], "hei": [1, 35, 45, 46, 50], "helena": [47, 62], "hello": [0, 43, 49], "help": [0, 31, 34, 36, 43, 45, 46, 52, 58, 69], "helper": 67, "her": [30, 31, 36], "here": [1, 29, 31, 34, 41, 42, 47, 61, 62, 66], "herself": [31, 36], "hesit": [60, 64], "hi": [31, 35, 36, 43, 45, 46], "hierach": 71, "hierarch": 71, "high": [0, 1, 2, 61, 62, 71], "higher": [0, 1, 21, 31, 34, 36, 40, 41, 42, 44, 45, 46, 55, 60], "highest": 71, "highlight": 1, "him": [31, 36], "himself": [31, 36], "hmm": [31, 36], "hoc": 62, "hold": 31, "hole": 62, "home": 42, "homework": 34, "homonym": 31, "hood": 1, "hope": 35, "host": [45, 46], "hour": 48, "how": [1, 5, 28, 29, 30, 31, 34, 35, 36, 39, 43, 45, 51, 52, 54, 56, 62], "howev": [0, 1, 3, 11, 35, 40, 42, 44, 54, 56, 61, 62], "howitwork": 1, "html": [1, 2, 15, 17, 24, 61], "http": [1, 2, 4, 5, 6, 12, 13, 15, 16, 17, 18, 21, 24, 41, 45, 46, 47, 61, 64, 67, 68, 70, 71], "hu": [1, 42, 62], "hug": [1, 51, 61], "huggingfac": 1, "huh": [31, 32, 36], "human": [37, 50, 62], "hyperlink": 48, "hyphen": [1, 42, 61, 67], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73], "iby1": 5, "id": [2, 4, 7, 23, 28, 62, 64, 66, 68, 71, 72, 73], "idea": [12, 35, 40, 47, 51], "ident": [34, 35], "identif": 1, "identifi": [0, 1, 2, 4, 8, 9, 15, 23, 25, 30, 31, 41, 47, 50, 52, 61, 63, 64, 71, 72], "identiif": [13, 71], "ignor": [1, 32], "illustr": [1, 41, 48, 62], "imagin": 1, "immedi": [31, 35, 64], "impact": [1, 60], "impersonal_pronoun": 49, "implement": 64, "impli": 37, "import": [31, 32, 36, 44, 45, 62, 69], "incent": 13, "includ": [0, 1, 2, 10, 17, 22, 31, 32, 35, 36, 42, 45, 46, 51, 52, 56, 61, 62, 66, 71], "inclus": [13, 71], "incongru": [8, 34], "incorpor": [1, 42, 45, 46], "increas": [1, 62], "incredibli": 42, "increment": 71, "independ": 1, "index": [1, 2, 4, 13, 25, 37, 39, 55, 61, 65], "indic": [1, 2, 16, 21, 22, 30, 32, 34, 35, 36, 40, 41, 43, 44, 48, 49, 50, 52, 55, 60, 63, 71], "indirect": 50, "indirect_btw": 50, "indirect_greet": 50, "indirectli": 69, "individu": [0, 1, 5, 11, 31, 34, 37, 45, 50, 59, 60, 62, 72], "inequ": 37, "infer": [1, 51, 67], "influenc": 1, "info": [13, 18, 64], "info_divers": 13, "info_exchang": 64, "info_exchange_wordcount": [41, 64], "info_exchange_zscor": 11, "inform": [0, 6, 11, 12, 13, 24, 32, 34, 39, 48, 62, 64, 65], "informal_titl": 49, "information_divers": 11, "initi": [2, 62, 63, 64, 65, 66], "input": [0, 2, 4, 6, 12, 13, 14, 15, 16, 19, 20, 22, 28, 42, 50, 55, 60, 62, 63, 64, 65, 66, 67, 71, 72], "input_data": [25, 68, 72], "input_df": [1, 2, 42, 61, 71], "inquiri": [0, 30, 39, 52], "insid": 1, "insight": 1, "inspir": 15, "instal": [1, 61, 62], "instanc": [1, 22, 50, 59, 66], "instanti": 2, "insteac": 1, "instead": [1, 62], "instruct": [1, 61], "int": [2, 3, 10, 13, 15, 16, 19, 20, 22, 28, 63, 64, 67], "intact": 71, "integ": [0, 13, 40, 47], "intend": 59, "interact": [1, 11, 43, 44, 62, 69], "interconnect": 62, "interest": [1, 61, 62], "interfac": 62, "intermedi": [59, 64], "intern": 29, "interpret": [0, 1], "interrupt": 59, "interv": [58, 65], "introduc": [42, 62], "introduct": [11, 61], "invalid": 67, "invers": 64, "involv": [41, 62, 65], "io": [1, 24, 47, 61], "ipynb": [0, 1], "is_hedged_sentence_1": 10, "is_valid_term": 67, "isn": [1, 31, 36], "issu": [1, 31, 36, 37, 42, 61], "ital": 64, "italic": 22, "item": [0, 71], "its": [0, 15, 31, 35, 36, 40, 41, 47, 54, 55, 64, 69], "itself": [31, 36, 44], "jami": [0, 42], "john": 1, "jonson": 62, "journal": [5, 64], "json": [1, 61], "jurafski": 70, "juri": 1, "juries_df": 1, "jury_conversations_with_outcome_var": 1, "jury_feature_build": 1, "jury_output": 1, "jury_output_chat_level": [1, 61], "jury_output_turn_level": 1, "just": [1, 2, 31, 36, 46, 50, 59, 61, 62], "katharina": 34, "keep": [1, 71], "kei": [1, 2, 4, 19, 28, 30, 54, 61, 71], "keyword": [19, 49], "kind": [10, 62], "kitchen": 42, "knob": 0, "know": [1, 30], "knowledg": 29, "known": [1, 32, 61], "kumar": 62, "kw": 19, "l714": 67, "l81": 67, "lab": [1, 2, 62, 71], "label": [1, 15, 21, 51], "lack": [31, 38, 45, 46], "languag": [15, 31, 34, 42, 50, 62], "larg": [1, 31, 69], "larger": [0, 31, 61], "last": [1, 31], "late": 32, "later": [0, 1, 2, 42, 61], "latest": [1, 61], "latter": [31, 36], "lda": [13, 40], "learn": [1, 61, 62], "least": [10, 32, 42, 63, 67], "led": 62, "legal": 49, "lemmat": [13, 40], "len": 28, "length": [35, 39, 41, 42, 44, 67], "less": [1, 13, 32, 50, 52, 55, 62, 63], "let": [41, 49, 53], "let_me_know": 49, "letter": [49, 71], "level": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 14, 16, 23, 61, 64, 65, 66, 71, 72], "lexic": [1, 10, 12, 14, 16, 31, 32, 36, 42, 60, 62, 64], "lexical_featur": [14, 64], "lexical_features_v2": [10, 11], "lexicon": [0, 5, 10, 14, 30, 39, 50, 52, 67, 69], "lexicons_dict": 67, "librari": [34, 51, 56, 57], "lift": 62, "light": 61, "like": [1, 22, 31, 34, 36, 41, 50, 61, 62], "limiat": 32, "limit": [11, 32, 37, 42, 54], "line": [0, 1, 19, 22, 48, 61, 62, 64, 67], "linear": 64, "linguist": [0, 18, 19, 30, 39, 50, 52], "link": [22, 29, 48, 50, 64], "list": [1, 2, 6, 7, 10, 11, 12, 13, 15, 19, 20, 21, 22, 28, 31, 33, 36, 37, 42, 48, 49, 50, 53, 54, 61, 64, 65, 66, 67, 68, 70, 71], "liter": 42, "literatur": 62, "littl": 38, "littlehors": 1, "liu": [42, 52], "live": [1, 54], "liwc": [0, 2, 14, 30, 39, 51, 52, 56, 62, 64, 67], "liwc2015": 42, "liwc_featur": [10, 14], "liwc_test_output": 42, "lix": 34, "ll": [1, 31, 36, 61], "load": [2, 19, 67, 69], "load_custem_liwc_dict": 2, "load_liwc_dict": 67, "load_saved_data": 19, "load_to_dict": 19, "load_to_list": 19, "loc": 15, "local": [1, 42, 51, 61], "locat": [1, 62], "long": 4, "longer": [30, 41, 43, 48, 61, 62], "look": [2, 34, 61, 65, 66], "loos": 36, "lot": [31, 36], "loud": 60, "love": [31, 56], "low": [1, 2, 29, 55, 60, 71], "lower": [0, 1, 21, 31, 33, 36, 41, 44, 55, 60], "lowercas": [2, 13, 40, 48, 49, 71], "lowest": 71, "lpearl": 16, "lst": 6, "m": [0, 2, 23, 30, 31, 36], "made": [1, 23, 35, 59, 61, 62], "magnitud": 55, "mai": [1, 2, 11, 28, 31, 32, 35, 36, 37, 41, 42, 43, 44, 54, 61, 62, 71], "main": [1, 2, 5, 62, 64, 65, 66, 67], "make": [1, 5, 31, 34, 55, 56, 62, 66, 69, 71], "man": 62, "mani": [1, 4, 11, 32, 37, 41, 60, 62, 66], "manner": [55, 62], "manual": [1, 61], "map": [13, 34, 67], "mark": [19, 20, 22, 43, 54, 64, 71], "marker": [18, 32, 39, 42, 50, 51, 52, 54, 56], "marlow": 44, "matarazzo": 62, "match": [1, 5, 16, 19, 30, 67], "math": 34, "matter": [28, 47], "max": [0, 1, 2, 11, 66, 72], "max_num_chunk": 63, "max_user_mean_num_word": 1, "maxim": [34, 35, 37, 72], "maximum": [1, 63, 65, 72], "mayb": [38, 47], "mcfarland": 70, "me": [31, 32, 36, 41, 50, 53], "mean": [0, 1, 2, 4, 6, 11, 13, 21, 29, 31, 34, 36, 40, 41, 42, 47, 55, 56, 58, 61, 62, 65, 66, 72, 73], "mean_num_word": 1, "meaning": [31, 41, 55], "meaningless": 41, "meant": 39, "measur": [0, 1, 7, 12, 13, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 64, 68], "mechan": 32, "median": [0, 1, 72], "medium": 21, "meet": 48, "member": [13, 34, 37, 55], "merg": [2, 8, 65, 66], "merge_conv_data_with_origin": 2, "messag": [0, 1, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 39, 41, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 71, 73], "messaga": 61, "message_col": [0, 1, 2, 12, 13, 14, 61, 64, 65, 67, 71], "message_col_origin": 14, "message_embed": [6, 7, 8], "message_lower_with_punc": 71, "metadata": [0, 1], "method": [1, 5, 31, 41, 50, 62, 65], "metric": [0, 1, 8, 30, 34, 35, 46, 47, 48, 55, 66], "michael": 1, "mid": [1, 2, 71], "middl": [21, 34, 63], "might": [0, 1, 29, 43, 48, 53], "mikeyeoman": [18, 64], "mileston": 34, "millisecond": [0, 2], "mimic": [28, 31, 36, 45], "mimic_word": 28, "mimick": [28, 31, 64], "mimicri": [0, 1, 28, 31, 35, 36, 39, 61, 64], "mimicry_bert": [45, 46], "min": [1, 2, 11, 72], "mind": [1, 35, 50], "mine": [31, 36, 53, 59], "minim": [0, 41, 60], "minimum": [65, 72], "minmiz": 72, "minu": [12, 41, 64], "minut": [55, 58], "mirror": 1, "miss": [1, 32, 61], "mitig": [31, 36], "mizil": [49, 50], "mm": [31, 36], "mnsc": 6, "modal": 50, "mode": 60, "model": [1, 2, 13, 15, 31, 34, 35, 36, 40, 45, 46, 47, 51, 62, 67], "model_bert": 67, "modif": 35, "modifi": [1, 9, 19, 32, 64], "modul": [0, 1, 11, 34, 49, 50, 61, 69], "monologu": 59, "more": [0, 1, 2, 11, 12, 22, 23, 24, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 45, 46, 50, 52, 55, 59, 61, 62, 71], "morn": 1, "most": [1, 24, 31, 55, 62, 69], "motiv": 61, "move": [0, 1, 28, 31, 36, 39, 45, 59, 61], "movi": 31, "much": [1, 28, 31, 34, 35, 36, 45, 62], "multi": [1, 2, 71], "multidimension": [45, 46], "multipl": [0, 1, 2, 19, 62, 71], "must": [1, 6, 62, 71], "my": [30, 31, 35, 36, 45, 46, 50, 53], "my_chat_featur": 1, "my_feature_build": 61, "my_fil": 1, "my_output": 61, "my_output_chat_level": 61, "my_output_conv_level": 61, "my_output_user_level": 61, "my_pandas_datafram": 61, "myself": [31, 36, 53], "n": [0, 2, 35, 45, 46, 47, 57, 59, 60], "n_chat": 59, "na": [5, 33, 43, 44, 48, 49, 50, 53, 58], "naiv": [2, 20, 32, 34, 38, 39, 53, 56, 57, 64], "name": [0, 2, 4, 7, 8, 9, 12, 13, 14, 15, 17, 19, 23, 25, 28, 30, 32, 35, 39, 42, 45, 46, 50, 51, 56, 63, 64, 66, 67, 68, 71, 72, 73], "name_to_train": 47, "named_ent": [15, 47], "named_entity_recognition_featur": 11, "nan": [0, 34, 67], "nate": [35, 45, 46], "nathaniel": [35, 45, 46], "nativ": 50, "natur": [43, 55], "ndarrai": 68, "nearest": [13, 40], "nearli": 62, "necessari": [63, 67], "need": [0, 1, 2, 21, 62, 66, 67], "need_sent": 67, "need_senti": 67, "neg": [1, 24, 29, 31, 34, 35, 36, 42, 50, 51, 52, 54, 56, 61, 62, 67], "negat": [19, 49], "negative_bert": [0, 1, 51, 61], "negative_emot": [49, 51, 52, 56], "negoti": 62, "neighborhood": 54, "neither": 30, "ner": 15, "ner_cutoff": [0, 1, 2, 47, 64], "ner_train": 64, "ner_training_df": [0, 1, 2, 47], "nest": [0, 1, 2, 22, 71], "net": [45, 46], "network": 11, "neutral": [1, 5, 24, 30, 51, 55, 61, 67], "neutral_bert": [1, 51, 61], "never": 1, "new": [1, 4, 13, 34, 61, 64, 65, 66, 72], "new_column_nam": 72, "next": [1, 32, 47, 58], "nice": [1, 50, 54, 61], "nicknam": 1, "niculescu": [49, 50], "night": 31, "nikhil": [59, 62], "nltk": [1, 42, 61], "nobodi": [31, 36], "nois": 32, "non": [1, 2, 28, 31, 37, 42, 48, 61, 62, 67, 71], "none": [1, 2, 19, 37, 55, 61, 64, 65, 66, 67], "nor": 30, "normal": [19, 28, 31], "notabl": 62, "note": [0, 2, 12, 16, 20, 42, 61, 67, 71], "notebook": [0, 1], "noth": [31, 36, 56], "noun": 1, "novel": [45, 46], "now": [0, 1], "nowher": [31, 36], "np": [67, 68], "ntri": 32, "null": 34, "num": 48, "num_char": 65, "num_chunk": [27, 63], "num_hedge_word": 10, "num_messag": 65, "num_named_ent": [15, 47], "num_row": 63, "num_top": 13, "num_word": [12, 16, 65], "number": [0, 1, 3, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 25, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 54, 56, 58, 59, 60, 62, 63, 64, 66, 67, 69, 71, 72], "numer": [0, 1, 2, 11, 13, 33, 68, 72, 73], "numpi": [1, 61, 68], "o": 35, "object": [1, 2, 11, 19, 44, 50, 57, 58, 61, 62, 64, 65, 66], "obtain": [0, 1, 13, 17, 23, 24, 34, 42, 61], "occur": [0, 4, 31, 42, 71], "occurr": 19, "off": [0, 1, 31, 36], "offer": 0, "offici": [61, 67], "often": [28, 36, 47, 48, 62], "oh": [31, 36, 48], "okai": [31, 36], "older": [1, 49, 61], "on_column": [18, 23, 28, 68, 72, 73], "onc": [1, 2, 11, 58, 61, 62, 67], "one": [0, 1, 2, 4, 10, 12, 19, 23, 25, 28, 29, 31, 32, 36, 37, 47, 51, 56, 59, 61, 62, 67, 68, 71, 73], "ones": [31, 36], "onli": [0, 1, 2, 5, 11, 23, 29, 31, 32, 34, 36, 37, 45, 53, 58, 59, 61, 62, 67, 71], "onlin": [1, 32, 39, 64], "onward": 0, "open": [0, 62, 66], "operation": [39, 50, 59], "opinion": [24, 31], "oppos": [2, 31, 34, 35, 55], "opposit": 34, "option": [1, 2, 37, 62, 63, 67, 71], "order": [0, 1, 35, 37, 42, 67, 71], "org": [2, 6, 15, 21, 24, 41, 70], "organ": 1, "origin": [1, 2, 5, 12, 21, 31, 32, 35, 36, 37, 45, 46, 49, 50, 59], "orthogon": 34, "other": [0, 1, 2, 9, 11, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 45, 46, 48, 51, 52, 54, 56, 58, 59, 61, 62, 64, 66, 71], "other_lexical_featur": [11, 64], "otherwis": [2, 10, 21, 32, 38, 63, 67, 71], "our": [0, 1, 2, 11, 13, 29, 31, 32, 36, 37, 39, 53, 59, 61, 71], "ourselv": 53, "out": [1, 16, 19, 31, 36, 42, 55, 60, 62], "outcom": [1, 44, 62], "output": [0, 2, 10, 17, 19, 40, 42, 61, 62, 64, 67], "output_file_bas": [0, 1, 2, 42, 61], "output_file_path_chat_level": [1, 2], "output_file_path_conv_level": [1, 2], "output_file_path_user_level": [1, 2], "output_path": 67, "outsid": [1, 2, 12], "over": [1, 16, 29, 31, 34, 35, 36, 37, 53, 55, 60, 62, 67, 71], "overal": [30, 31, 34, 36, 45, 46], "overrid": [0, 1, 2], "overview": [0, 61, 62], "overwhelmingli": 1, "overwritten": 1, "own": [0, 1, 2, 9, 35, 62, 64], "p": 55, "pacakg": 24, "pace": [43, 62], "packag": [17, 18, 40, 62], "pad": 19, "page": [1, 11, 29, 39, 61, 62, 69], "pair": [6, 19, 34, 49, 71], "pairwis": [6, 34], "panda": [0, 1, 2, 12, 14, 16, 23, 47, 64, 65, 66, 71, 72, 73], "paper": [4, 5, 12, 18, 29, 40, 49, 50, 64], "paragraph": 22, "paramet": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 47, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "paramt": 1, "pardon": 32, "parenthes": [22, 42, 48, 64], "parenthet": [22, 48], "pars": [2, 16, 50, 60], "parser": 67, "part": [1, 10, 13, 29, 36, 42, 52, 67, 71], "particip": [1, 9, 37, 62], "particl": [31, 36], "particular": [1, 11, 31, 32, 34, 41, 45, 47, 51, 59, 62], "particularli": 42, "partner": 32, "pass": [1, 13, 21, 47, 71], "path": [1, 2, 19, 61, 67], "path_in": 19, "pattern": [4, 11, 19, 42, 55, 62, 67], "paus": 4, "pd": [1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 19, 23, 25, 63, 64, 65, 66, 67, 68, 71], "pdf": [5, 12, 13, 16, 18, 21, 24, 64, 70], "penalti": 1, "pennebak": [0, 12, 37, 41, 42, 52], "pennyslvania": 62, "peopl": [1, 32, 59, 62], "per": [1, 6, 9, 19, 42, 63, 66, 72], "percentag": [2, 21], "perfect": [37, 59], "perform": [0, 1, 16, 50], "perhap": 1, "period": [4, 34, 55], "person": [1, 8, 12, 15, 16, 32, 34, 39, 41, 42, 50, 59, 62, 64, 70], "perspect": 1, "petrocelli": 5, "phrase": [19, 30, 38, 54], "phrase_split": 19, "pickl": [19, 67], "piec": [36, 42, 59, 63], "pl": 50, "place": [55, 61, 62], "plan": [34, 35, 45, 46], "player": 59, "pleas": [0, 1, 38, 49, 50, 61, 62], "please_start": 50, "point": [22, 24, 34, 35, 42, 45, 46, 48, 52, 64, 66], "poisson": 55, "polar": [24, 39, 51, 52, 64], "polit": [1, 17, 18, 30, 32, 38, 39, 42, 51, 52, 54, 56, 64], "politeness_featur": 11, "politeness_v2": 11, "politeness_v2_help": 11, "politenessstrategi": [17, 50], "portion": 0, "posit": [0, 1, 11, 15, 24, 29, 31, 39, 42, 50, 51, 54, 56, 61, 62, 64, 67], "positive_affect_lexical_per_100": [51, 52, 56], "positive_bert": [0, 1, 51, 61], "positive_emot": [49, 51, 52, 56], "positivity_bert": [1, 61], "positivity_zscor": 64, "positivity_zscore_chat": 52, "positivity_zscore_convers": 52, "possess": 31, "possibl": [1, 34, 62, 66], "possibli": [38, 62], "ppron": 67, "practic": [34, 35], "pre": [1, 4, 21, 37, 49, 64], "preced": [31, 35, 71], "precend": 35, "precis": 47, "precomput": 51, "predefin": 19, "predetermin": [31, 36], "predict": [2, 47, 51, 64], "prefer": [0, 1], "preload_word_list": 69, "prep_simpl": 19, "prep_whol": 19, "preposit": [31, 36], "preproces": 48, "preprocess": [0, 1, 2, 13, 19, 40, 43, 49, 51, 61, 69], "preprocess_chat_data": 2, "preprocess_conversation_column": 71, "preprocess_naive_turn": 71, "preprocess_text": 71, "preprocess_text_lowercase_but_retain_punctu": 71, "presenc": [2, 32, 67], "present": [1, 2, 14, 30, 31, 38, 42, 55, 62], "preserv": 42, "prespecifi": 19, "prevent": 51, "previou": [1, 7, 28, 31, 36, 45, 46, 58, 64, 71], "primari": 34, "print": [2, 71], "prior": [2, 64, 71], "priya": [47, 62], "probabl": [15, 47], "problem": 62, "procedur": 62, "proceed": 46, "process": [0, 1, 2, 4, 10, 21, 37, 42, 55, 62, 64, 65, 67, 69, 71], "prodi": 15, "produc": [1, 2, 34], "product": 15, "professor": 62, "progress": [1, 2], "project": [54, 62], "pronoun": [12, 16, 31, 36, 39, 41, 42, 64, 67, 70], "proper": 1, "properli": 42, "properti": [1, 11, 61], "proport": [16, 39, 42, 64], "propos": 37, "provid": [0, 1, 2, 15, 29, 30, 33, 36, 39, 44, 47, 54, 62], "proxi": 42, "pseudonym": 1, "psycholog": 42, "pub": 70, "publish": [5, 30, 64], "pubsonlin": 6, "punctuat": [0, 2, 16, 19, 20, 21, 28, 43, 54, 60, 67, 71], "punctuation_seper": 19, "puncut": 48, "pure": [24, 36], "purpos": 1, "put": [34, 42, 50, 62, 66], "py": [0, 1, 14, 49, 61, 64, 67], "pydata": 2, "pypi": [1, 61], "python": [1, 32, 41, 56, 57, 61, 62, 68], "qtd": 62, "qualiti": 41, "quantifi": [31, 36, 62], "quantiti": [37, 39, 41, 47], "quartil": 50, "question": [16, 19, 20, 29, 32, 39, 49, 50, 64, 66, 68, 70], "question_num": 11, "question_word": 20, "quick": [1, 43], "quickli": 0, "quit": 40, "quot": [22, 48, 64], "quotat": [22, 48], "rabbit": 62, "rain": 41, "rais": [2, 67, 71], "random": 55, "rang": [5, 8, 24, 30, 33, 34, 35, 40, 51, 53, 55, 56, 57], "ranganath": [16, 31, 32, 36, 38, 43, 54, 70], "ranganath2013": 70, "ranganathetal2013_detectingflirt": 16, "rapid": [1, 4], "rare": [34, 35], "rate": [42, 51], "rather": [1, 31, 34, 35, 36, 37, 45, 46, 63], "ratio": [16, 39, 64], "raw": [0, 12, 16, 21, 31, 33, 42, 50, 64], "re": [1, 31, 36, 42, 50, 61], "reach": 42, "read": [0, 1, 2, 16, 21, 29, 33, 61, 62, 64, 65, 66, 67], "read_csv": 1, "read_in_lexicon": 67, "readabl": [11, 33, 64, 70], "reader": 33, "readi": 1, "readili": 62, "readthedoc": [1, 24, 61], "real": [1, 55], "realit": 13, "realli": [31, 36, 50], "reason": [31, 36, 45, 46, 49], "reassur": 49, "recal": 47, "recent": [0, 50], "recept": [18, 32, 39, 42, 50, 51, 52, 54, 56, 62, 64], "recogn": [1, 42, 43, 47], "recognit": [0, 2, 39, 64], "recommend": [0, 42, 62], "reddit": [48, 64], "reddit_tag": 11, "redditus": 48, "reduc": 63, "reduce_chunk": 63, "redund": [42, 62], "refer": [0, 1, 2, 11, 22, 24, 28, 31, 42, 48, 52, 61, 62, 64, 70], "reflect": [37, 43], "regardless": 1, "regener": [0, 2, 51, 67], "regenerate_vector": [0, 1, 2, 67], "regex": [14, 16, 42, 49, 67], "regist": 37, "regress": 1, "regular": [5, 14, 30, 32, 42, 55, 58, 67], "reichel": [53, 58, 60], "reidl": [4, 13], "reinvent": 62, "rel": [41, 51, 52, 55, 60, 64], "relat": [1, 61, 62, 64], "relationship": 36, "relev": [1, 29, 42, 44, 49, 51, 56, 61, 64, 65], "reli": [31, 34, 35, 36, 69], "reliabl": [33, 42], "remain": [1, 30, 71], "rememb": 1, "remov": [0, 2, 9, 13, 19, 28, 40, 43, 48, 49, 50, 71], "remove_active_us": 9, "remove_unhashable_col": 71, "renam": 1, "repair": [16, 39], "repeat": [60, 71], "repetit": 60, "replac": 19, "report": [1, 61], "repres": [2, 4, 6, 7, 11, 13, 23, 31, 34, 36, 42, 45, 46, 64, 66, 67, 68, 71, 72, 73], "represent": [34, 38, 67], "reproduc": [36, 62], "republican": 1, "request": [32, 50, 51], "requir": [0, 1, 20, 21, 31, 55, 61, 62, 64, 65, 66, 67, 71], "research": [1, 62], "reserv": 0, "resolv": 62, "resourc": [1, 39, 48, 61, 62], "respect": [1, 2, 12, 31, 36, 37, 69], "respons": [22, 48, 55, 58, 64], "restaur": [34, 56], "restor": 0, "restrict": 71, "result": [40, 55, 65, 72], "retain": [2, 16, 20, 21, 60, 71], "retriev": 50, "retunr": 3, "return": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 32, 43, 49, 50, 51, 55, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "reveal": 62, "revert": 50, "review": 62, "rewrit": 50, "rich": 62, "riedl": [13, 40, 55], "right": [31, 36, 61, 62], "roberta": [0, 1, 39, 42, 52, 56, 61, 64, 67], "robust": 13, "rocklag": [5, 30, 64], "room": 59, "root": [13, 40], "rough": [12, 54], "roughli": 31, "round": [13, 40, 59, 71], "round_num": 1, "row": [0, 1, 2, 9, 13, 25, 37, 40, 59, 63, 68, 71, 72, 73], "rowbotham": 62, "rucker": 5, "rule": [1, 69], "run": [0, 10, 12, 16, 35, 46, 47, 48, 51, 61, 69], "runtim": [1, 35], "ryan": [0, 42], "ryanboyd": 67, "sagepub": [5, 64], "sai": [1, 32, 50, 59], "said": [1, 36, 62], "same": [0, 1, 2, 31, 34, 37, 45, 48, 52, 59, 60, 62, 71], "sampl": [61, 62], "sarcast": 48, "save": [0, 1, 2, 19, 64, 67], "save_featur": 2, "sbert": [0, 1, 28, 31, 34, 35, 36, 45, 46, 64, 65, 67], "scale": [42, 51], "schema": 1, "scheme": 0, "school": [21, 62], "scienc": [29, 39, 62], "scientist": [61, 62], "score": [1, 4, 5, 11, 12, 13, 15, 21, 24, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 45, 46, 47, 50, 51, 53, 56, 57, 61, 64, 65, 67, 73], "script": [1, 61], "sea": 1, "seamless": 62, "search": [19, 61], "second": [0, 1, 4, 34, 42, 58, 59], "second_person": 49, "secr": [18, 49, 50, 64], "section": [1, 29, 61], "see": [0, 1, 2, 11, 30, 34, 38, 41, 45, 46, 47, 55, 62, 71], "seek": [5, 62], "seen": 67, "segment": [0, 19], "select": [2, 4, 23, 28, 36, 45, 64, 66, 67, 68, 71, 72, 73], "self": [1, 2, 61], "semant": [31, 34, 35, 41], "semantic_group": [1, 61], "send": [1, 37, 55], "sens": [1, 5, 31, 54, 66], "sensibl": 1, "sent": [1, 37, 64], "sentenc": [0, 1, 10, 15, 19, 20, 21, 33, 34, 35, 36, 42, 45, 46, 47, 48, 54, 56, 61, 67], "sentence_pad": 19, "sentence_split": 19, "sentence_to_train": 47, "sentencis": 19, "sentiment": [0, 1, 24, 31, 39, 42, 52, 56, 61, 62, 64, 67], "sentimet": 1, "separ": [1, 2, 19, 34, 42, 51, 67], "sepcifi": 1, "septemb": 40, "sequenc": [1, 59], "sequenti": 1, "seri": [12, 16, 23, 28, 42, 71, 73], "serv": 12, "set": [0, 1, 2, 13, 34, 48, 59], "set_self_conv_data": 2, "sever": [1, 30, 41, 42, 48, 51, 56, 61], "shall": 54, "share": [31, 36, 37], "she": [30, 31, 36], "shift": 34, "shop": 62, "short": [55, 58], "shorter": [13, 40, 41, 42, 43], "should": [0, 1, 2, 4, 14, 23, 28, 29, 31, 36, 47, 48, 54, 61, 62, 64, 66, 67, 68, 69, 71, 72, 73], "shouldn": [31, 36], "show": [1, 37, 61], "showeth": 62, "shruti": [35, 45, 46, 47, 62], "side": 31, "signal": [45, 55], "signifi": 42, "signific": [1, 61], "silent": 37, "similar": [1, 6, 7, 13, 28, 29, 31, 34, 35, 36, 40, 45, 46, 49, 50, 62, 65], "similarli": [1, 35], "simpl": [0, 1, 16, 19, 42, 61, 62], "simpli": [1, 5, 11, 28, 42, 56, 62], "simplifi": 1, "simplist": 41, "sinc": [1, 32, 41, 71], "singh": 62, "singl": [0, 1, 2, 11, 12, 19, 23, 31, 34, 35, 36, 37, 41, 45, 46, 59, 62, 71, 72], "singular": [12, 41, 64], "site": 16, "situat": 37, "size": [1, 13, 63, 67], "skip": 1, "slightli": [32, 62, 63], "slow": 1, "small": 40, "so": [1, 2, 10, 30, 31, 36, 37, 42, 50, 61, 62, 66, 67], "social": [29, 39, 61, 62], "socsci": 16, "softwar": 62, "sohi": 62, "sol3": 4, "solut": [1, 59], "solv": 62, "some": [0, 1, 11, 17, 29, 32, 34, 35, 37, 41, 61, 63], "somebodi": [31, 36], "someon": [22, 29, 31, 36, 47, 48, 61, 64], "someplac": [31, 36], "someth": 47, "sometim": 1, "somewhat": 35, "soon": 62, "sorri": [16, 32, 50], "sort": [10, 42, 67], "sort_word": 67, "sound": [47, 51], "sourc": [4, 5, 6, 12, 13, 16, 17, 21, 34, 35, 50, 64, 68], "space": [34, 40, 42, 67, 71], "spaci": [1, 19, 47, 49, 50, 61], "span": 63, "spars": 32, "speak": [1, 31, 36, 37, 59, 60, 62], "speaker": [0, 1, 2, 6, 8, 9, 25, 31, 34, 35, 37, 38, 42, 45, 46, 61, 66, 71, 72], "speaker_id": [2, 61, 72], "speaker_id_col": [0, 1, 2, 6, 8, 9, 25, 26, 27, 61, 65, 66, 71, 72], "speaker_nicknam": [0, 1, 2, 6, 9, 59, 66], "special": [0, 1, 2, 48, 71], "specif": [1, 2, 12, 32, 41, 48, 55, 61, 62, 69, 71], "specifi": [1, 2, 19, 47, 49, 65, 66, 67, 68, 71, 72, 73], "speciifc": 63, "speed": 1, "spend": [51, 62], "spike": 55, "split": [19, 21, 43, 63], "spoke": 59, "spoken": [11, 37], "spread": 55, "squar": [13, 40], "src": 67, "ssrn": 4, "stabl": 40, "stack": 14, "stackoverflow": 68, "stage": [1, 2, 34, 71], "stamp": 55, "standard": [1, 4, 37, 40, 41, 42, 49, 55, 58, 60, 65, 72, 73], "stanford": 70, "start": [15, 19, 20, 22, 23, 50], "statement": [1, 38, 42, 47, 48, 61, 62, 64], "statist": [1, 65, 66, 68], "statologi": 41, "stdev": [1, 2, 11, 65, 66], "stem": 42, "step": [1, 4, 28, 41, 45, 46, 51], "still": [1, 41, 45, 46], "stochast": 40, "stop": [40, 62], "stopword": [13, 19], "store": [1, 12, 16, 41, 49, 51, 61, 65, 67], "stoword": 42, "str": [2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 63, 64, 65, 66, 67, 68, 71, 72, 73], "str_to_vec": 67, "str_vec": 67, "straightforward": 29, "strategi": [17, 30, 32, 38, 39, 42, 49, 54, 64], "stream": 35, "strictli": 1, "string": [0, 1, 2, 4, 8, 12, 13, 14, 19, 23, 24, 50, 64, 66, 67, 68, 71, 72, 73], "strongli": [1, 41, 61], "structur": [0, 36, 49], "student": [21, 33], "studi": [1, 34, 62], "style": [1, 31, 36, 59], "sub": [0, 1, 71], "subfold": 1, "subject": [5, 24, 28, 39, 49, 64], "subjunct": 50, "sublist": 28, "submiss": 55, "subpart": [1, 71], "subsequ": [1, 30, 51, 58], "subset": 62, "substanc": 36, "substant": 31, "substanti": 1, "substr": 30, "subtask": 1, "subtract": [41, 58], "succe": 62, "success": [0, 1, 4, 31, 36, 43, 55, 58], "suggest": [1, 13, 34, 42, 44, 50], "suit": [62, 64], "suitabl": 2, "sum": [1, 28, 34, 61, 64, 65, 66, 72], "summar": [0, 1, 69], "summari": [65, 66, 72], "summariz": [0, 65], "summarize_featur": 69, "suppl": 6, "support": [1, 15, 42, 61], "suppos": 1, "sure": 30, "swear": 49, "symbol": 67, "syntax": [1, 32, 61], "system": [2, 59, 64], "t": [0, 1, 15, 29, 31, 36, 42, 45, 49, 54, 61, 62, 67], "tabl": [1, 62], "tag": 39, "take": [1, 4, 5, 9, 14, 25, 29, 31, 34, 37, 39, 42, 55, 61, 65, 67, 71], "taken": [59, 71], "talk": [1, 37, 47, 59, 62], "tandem": [1, 61], "target": 15, "task": [1, 2, 59, 71], "tausczik": [12, 37, 41, 52], "tausczikpennebaker2013": 12, "team": [0, 1, 4, 11, 12, 13, 34, 39, 40, 42, 59, 65], "team_bursti": 4, "team_comm_tool": [1, 61], "teamcommtool": 1, "technic": [29, 39, 61, 62], "teghxgbqdhgaaaaa": 5, "tempor": [0, 2, 55, 58, 64, 71], "temporal_featur": 11, "tend": [1, 34, 60], "term": [1, 28, 59, 67], "termin": [1, 2, 61], "terribl": 51, "test": [13, 33, 47], "text": [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 28, 32, 33, 36, 42, 48, 55, 62, 64, 67, 71], "text_based_featur": 64, "textblob": [24, 39, 51, 52, 64], "textblob_sentiment_analysi": 11, "than": [0, 1, 2, 11, 13, 31, 34, 35, 36, 37, 40, 41, 45, 46, 54, 60, 62, 63], "thee": 62, "thei": [0, 1, 11, 28, 29, 31, 34, 36, 37, 39, 42, 47, 58, 59, 61, 62, 67], "them": [0, 1, 2, 19, 28, 29, 31, 36, 50, 51, 55, 59, 61, 62, 64, 65, 66, 67], "themselv": [31, 36, 60], "theoret": 35, "theori": [34, 50], "therebi": 0, "therefor": [0, 1, 11, 28, 37, 45, 59, 62, 69], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 20, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 73], "thing": [48, 61], "think": [1, 38, 47], "thorough": [43, 62], "those": [1, 21, 31, 36, 61, 71], "though": [34, 42, 50], "thought": [1, 35, 45], "thread": [1, 61], "three": [0, 1, 2, 22, 34, 37, 40, 51, 61, 62, 69, 71], "threshold": [15, 47], "through": [1, 45, 46, 50, 61, 62], "throughout": [31, 35, 36, 40, 45, 46, 55, 63], "tht": 35, "thu": [1, 34, 35, 36, 37, 46, 55, 71], "time": [0, 1, 2, 4, 23, 34, 35, 39, 42, 48, 51, 55, 59, 61, 62, 63, 64, 65, 66, 71], "time_diff": 55, "timediff": 4, "timestamp": [0, 1, 2, 8, 23, 58, 61, 62, 63, 64, 71], "timestamp_col": [0, 1, 2, 8, 61, 63, 64, 65, 71], "timestamp_end": [1, 23, 61], "timestamp_start": [1, 23, 61], "timestamp_unit": [0, 2, 23, 64], "to_datetim": [0, 2], "todai": [34, 35, 41, 43, 45, 46, 47], "todo": 66, "togeth": [0, 62, 66], "token": [16, 19, 39, 49, 54, 64, 67], "token_count": [19, 49], "too": [30, 31, 36, 62], "took": [1, 59], "tool": [1, 61, 62], "toolkit": [0, 1, 11, 42, 45, 46, 55, 62, 65, 66], "top": [1, 50, 59], "topic": [1, 13, 31, 34, 40, 42, 43, 65], "tormala": 5, "total": [0, 1, 3, 12, 16, 25, 31, 34, 36, 37, 41, 44, 53, 59, 60, 61, 62, 63, 64, 66, 72], "touch": [1, 61], "toward": [31, 36, 38, 42, 45, 46], "track": [65, 66], "tradit": 49, "train": [1, 2, 15, 64], "train_spacy_n": 15, "transcript": 0, "transfom": [45, 46], "transform": [1, 31, 34, 35, 36, 51], "transform_utter": 50, "treat": [0, 1, 42, 59, 61], "tri": 50, "trivial": [3, 44, 62], "troubl": [1, 61], "true": [0, 1, 2, 37, 61, 63, 65, 66, 67, 71], "truncat": 2, "truth_intensifi": 49, "ttr": 64, "tupl": [0, 1, 2, 15, 19, 64], "turn": [0, 2, 25, 28, 31, 32, 37, 39, 61, 64, 65, 71], "turn_count": 59, "turn_df": 71, "turn_id": 71, "turn_taking_featur": 11, "twice": 63, "twitter": [1, 51, 61], "two": [0, 1, 2, 23, 31, 34, 36, 41, 45, 46, 52, 62, 63, 67], "txt": 19, "type": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 37, 39, 52, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "typic": [1, 34, 40, 41, 42, 52, 60], "u": [0, 1, 2, 22, 31, 36, 48, 49, 58], "uci": 16, "uh": [31, 36], "ulrich": 55, "um": [31, 36, 60], "umbrella": [8, 29, 34], "uncertain": [5, 30], "uncertainti": 30, "under": [0, 1, 10, 11, 12, 28, 40], "underli": [1, 61], "underscor": [1, 42, 61], "understand": [0, 33, 39, 43, 48, 58, 61, 62], "understood": 33, "unhash": 71, "uninterrupt": 59, "uniqu": [0, 1, 2, 6, 9, 13, 16, 23, 25, 41, 47, 52, 60, 61, 63, 71], "unit": [0, 2, 23], "univers": 62, "unix": 58, "unless": [31, 36], "unpack": 62, "unpreprocess": 0, "until": [31, 36, 45, 46], "unzip": [1, 61], "up": [1, 17, 21, 28, 31, 35, 36, 37, 42, 45, 46, 51, 59, 61, 67], "updat": [1, 9, 40, 54, 61], "upenn": 1, "upgrad": 50, "upload": 13, "upon": 33, "us": [0, 2, 3, 5, 11, 12, 13, 17, 19, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 60, 62, 64, 65, 66, 67, 71], "usag": [0, 21, 24], "use_gpu": [0, 1, 2, 67], "use_time_if_poss": 63, "user": [0, 1, 2, 9, 14, 15, 22, 37, 42, 47, 48, 51, 61, 62, 63, 64, 65, 66, 69, 72], "user_aggreg": [0, 1, 2, 65, 66], "user_column": [0, 1, 2, 65, 66], "user_data": [2, 65, 66], "user_df": 9, "user_level_featur": 2, "user_list": 9, "user_method": [0, 1, 2, 65, 66], "userlevelfeaturescalcul": [2, 66, 69], "usernam": [22, 48], "utf": 1, "util": [1, 12, 21, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "utilti": 62, "utter": [0, 1, 2, 3, 4, 5, 13, 14, 15, 16, 17, 20, 21, 23, 24, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 58, 60, 61, 67], "v": [0, 1, 13, 61], "v0": 0, "valenc": 51, "valid": [1, 55, 67, 71], "valu": [0, 1, 2, 5, 6, 10, 12, 13, 18, 19, 28, 30, 31, 34, 36, 37, 40, 41, 42, 45, 46, 47, 55, 59, 61, 64, 67, 68, 72, 73], "valueerror": [2, 71], "vari": [13, 31, 34, 35], "variabl": [1, 56, 57, 64, 65, 66], "varianc": [1, 8, 34], "variance_in_dd": 11, "variat": [4, 32], "varieti": [42, 62], "variou": [19, 42, 64, 65, 66], "vast": 62, "ve": [0, 31, 36, 50, 61], "vec": 6, "vect_data": [1, 7, 8, 28, 61, 64, 65, 66], "vect_path": 67, "vector": [0, 2, 6, 7, 8, 13, 28, 34, 35, 40, 55, 61, 64, 65, 67], "vector_data": [0, 1, 2, 61], "vector_directori": [0, 1, 2, 61, 65], "vein": 45, "verb": [19, 31, 36], "verbal": 32, "veri": [5, 28, 30, 31, 34, 35, 36, 42, 49, 54], "verifi": 2, "verify_timestamp_format": 2, "verit": 62, "version": [0, 1, 12, 14, 21, 28, 31, 40, 42, 50, 51, 61], "versu": [4, 29, 47, 55, 59], "vert": 2, "via": [3, 44], "view": 50, "visit": 41, "voila": 62, "w": [31, 42], "wa": [0, 1, 2, 5, 12, 31, 32, 35, 36, 47, 51, 56, 59, 62, 71], "wai": [0, 1, 2, 29, 30, 31, 32, 34, 49, 50, 54, 56, 57, 61, 62, 66], "waiai": 62, "wait": [4, 55], "walk": 1, "walkthrough": [0, 61, 62], "want": [1, 28, 34, 59, 61, 62, 65, 66, 67], "warn": [1, 50, 71], "watt": [1, 2, 62, 71], "we": [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 15, 16, 18, 24, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 57, 58, 59, 61, 62, 66, 67, 71], "web": 70, "websit": [1, 61], "week": 47, "weight": 66, "weigt": 31, "welcom": 61, "well": [11, 29, 31, 36, 55, 62], "went": 41, "were": [1, 2, 12, 31, 36, 42, 71], "western": 1, "wh": [19, 31, 36], "wh_question": [32, 49, 54], "wharton": 62, "what": [1, 2, 12, 16, 20, 29, 31, 32, 34, 35, 36, 39, 41, 45, 46, 47, 50, 54, 62, 63], "whatev": [1, 31, 36], "wheel": 62, "when": [1, 16, 20, 31, 33, 36, 42, 47, 54, 55, 59, 60, 61, 62, 67, 69, 71], "whenev": 71, "where": [1, 2, 19, 20, 28, 31, 32, 36, 37, 40, 41, 42, 48, 50, 51, 54, 59, 61, 65, 68, 73], "wherea": [31, 34, 35, 36, 43], "wherev": [31, 36], "whether": [1, 2, 10, 16, 19, 32, 37, 38, 41, 43, 47, 57, 58, 62, 63, 64, 67, 71], "which": [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 15, 16, 18, 25, 28, 31, 34, 35, 36, 37, 38, 40, 41, 42, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 65, 66, 67, 68, 69, 71, 72, 73], "while": [1, 31, 32, 34, 36, 37, 44, 45, 46, 55, 62, 71], "whitespac": 43, "who": [1, 20, 31, 32, 36, 47, 51, 54, 59, 60, 62], "whole": [28, 42, 59, 62, 71], "whom": [31, 36, 54], "whose": [31, 36, 54], "why": [20, 29, 31, 36, 54], "wide": 31, "wien": 62, "wiki": [21, 29, 70], "wiki_link": [1, 61], "wikipedia": [21, 33, 37, 70], "williamson": 60, "wish": [1, 2, 18, 28], "within": [0, 1, 2, 8, 11, 16, 28, 30, 31, 34, 35, 36, 41, 45, 46, 52, 55, 59, 60, 62, 63, 64, 68, 71, 73], "within_group": 2, "within_person_discursive_rang": 11, "within_task": [0, 1, 2, 71], "without": [1, 19, 31, 36, 42, 47, 54, 62, 69], "won": [0, 31, 36, 45], "wonder": 56, "woolei": 4, "woollei": [13, 40, 55], "wooten": 55, "word": [0, 1, 3, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 28, 30, 32, 33, 37, 38, 39, 40, 41, 43, 45, 46, 48, 49, 52, 53, 54, 56, 57, 62, 64, 65, 66, 67, 69, 70], "word_mimicri": 11, "word_start": [19, 49], "wordcount": [1, 42], "wordnet": [1, 61], "words_in_lin": 19, "work": [0, 11, 42, 47, 50, 55, 61, 62], "world": 55, "worri": 62, "would": [1, 29, 31, 34, 35, 36, 37, 42, 50, 54, 62], "wouldn": [31, 36], "wow": 50, "wp": 13, "wrap": 42, "write": [2, 29, 60], "www": [12, 13, 18, 41, 64], "x": [0, 1, 2, 4, 46, 68], "xinlan": 62, "yashveer": 62, "ye": 19, "yeah": [31, 36], "yeoman": [18, 49, 50], "yesno_quest": [32, 49, 54], "yet": 48, "ylatau": 12, "you": [0, 1, 2, 11, 24, 29, 31, 36, 37, 42, 43, 47, 50, 59, 61, 62, 69], "your": [0, 29, 31, 32, 36, 37, 50, 59, 61, 62], "your_data": 42, "yourself": [31, 36, 50], "yuluan": 62, "yup": [31, 36], "yuxuan": 62, "z": [12, 39, 49, 51, 64, 73], "z0": 67, "za": 67, "zero": [13, 52], "zhang": 62, "zheng": 62, "zhong": 62, "zhou": 62, "zscore": 41, "zscore_chat": 41, "zscore_chats_and_convers": 69, "zscore_convers": 41, "\u00bc": 47, "\u03c4": 55}, "titles": ["The Basics (Get Started Here!)", "Worked Example", "feature_builder module", "basic_features module", "burstiness module", "certainty module", "discursive_diversity module", "fflow module", "get_all_DD_features module", "get_user_network module", "hedge module", "Features: Technical Documentation", "info_exchange_zscore module", "information_diversity module", "lexical_features_v2 module", "named_entity_recognition_features module", "other_lexical_features module", "politeness_features module", "politeness_v2 module", "politeness_v2_helper module", "question_num module", "readability module", "reddit_tags module", "temporal_features module", "textblob_sentiment_analysis module", "turn_taking_features module", "variance_in_DD module", "within_person_discursive_range module", "word_mimicry module", "FEATURE NAME", "Certainty", "Content Word Accommodation", "Conversational Repair", "Dale-Chall Score", "Discursive Diversity", "Forward Flow", "Function Word Accommodation", "Gini Coefficient", "Hedge", "Features: Conceptual Documentation", "Information Diversity", "Information Exchange", "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons", "Message Length", "Message Quantity", "Mimicry (BERT)", "Moving Mimicry", "Named Entity Recognition", "Online Discussion Tags", "Politeness/Receptiveness Markers", "Politeness Strategies", "Sentiment (RoBERTa)", "Positivity Z-Score", "Proportion of First Person Pronouns", "Question (Naive)", "Team Burstiness", "Textblob Polarity", "Textblob Subjectivity", "Time Difference", "Turn Taking Index", "Word Type-Token Ratio", "The Team Communication Toolkit", "Introduction", "assign_chunk_nums module", "calculate_chat_level_features module", "calculate_conversation_level_features module", "calculate_user_level_features module", "check_embeddings module", "gini_coefficient module", "Utilities", "preload_word_lists module", "preprocess module", "summarize_features module", "zscore_chats_and_conversation module"], "titleterms": {"0": 42, "1": 42, "5": 42, "A": 0, "One": 0, "The": [0, 61, 62], "accommod": [31, 36], "addit": 1, "advanc": 1, "aggreg": [1, 11], "analyz": 1, "assign_chunk_num": 63, "assumpt": 0, "base": 11, "basic": [0, 1, 29, 30, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60], "basic_featur": 3, "bert": 45, "bring": 42, "bursti": [4, 55], "cach": 1, "calculate_chat_level_featur": 64, "calculate_conversation_level_featur": 65, "calculate_user_level_featur": 66, "caveat": [1, 29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "certainti": [5, 30], "chall": 33, "chat": [11, 39], "check_embed": 67, "citat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "class": 69, "code": [0, 1], "coeffici": 37, "column": [1, 61], "commun": 61, "conceptu": 39, "configur": 1, "consider": 1, "content": [31, 61], "convers": [1, 11, 32, 39, 62, 69], "count": [42, 59], "cumul": 1, "custom": [1, 42], "customiz": 0, "dale": 33, "data": 1, "declar": 61, "demo": [0, 1], "detail": 1, "differ": 58, "directori": 1, "discurs": 34, "discursive_divers": 6, "discuss": 48, "divers": [34, 40], "document": [11, 39, 62], "driver": 69, "entiti": [1, 47], "environ": [1, 61], "exampl": [1, 41, 47], "exchang": 41, "featur": [1, 11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 69], "feature_build": 2, "featurebuild": [1, 61, 62], "fflow": 7, "file": [1, 30, 34, 35, 45, 46, 47, 51], "first": [1, 53], "flow": 35, "forward": 35, "function": [0, 36], "gener": [1, 61, 62], "get": [0, 1, 61, 62], "get_all_dd_featur": 8, "get_user_network": 9, "gini": 37, "gini_coeffici": 68, "gpu": 1, "group": 1, "hedg": [10, 38], "here": 0, "high": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "implement": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "import": [1, 61], "index": 59, "indic": 61, "info_exchange_zscor": 12, "inform": [1, 40, 41, 61], "information_divers": 13, "input": [1, 34], "inquiri": 42, "inspect": [1, 61], "interpret": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "introduct": 62, "intuit": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "kei": 0, "length": 43, "level": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "lexical_features_v2": 14, "lexicon": 42, "light": 0, "linguist": 42, "liwc": 42, "marker": 49, "messag": [43, 44], "mimicri": [45, 46], "modul": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "motiv": 62, "move": 46, "naiv": 54, "name": [1, 29, 47, 61], "named_entity_recognition_featur": 15, "new": 42, "note": [1, 29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "onlin": 48, "other": [42, 69], "other_lexical_featur": 16, "ouput": 34, "our": 62, "output": [1, 30, 35, 45, 46, 47, 51], "overview": 1, "own": 42, "packag": [0, 1, 61], "paramet": [0, 1], "percentag": 1, "person": 53, "pip": [1, 61], "polar": 56, "polit": [49, 50], "politeness_featur": 17, "politeness_v2": 18, "politeness_v2_help": 19, "posit": 52, "preload_word_list": 70, "preprocess": 71, "pronoun": 53, "proport": 53, "quantiti": 44, "question": 54, "question_num": 20, "ratio": 60, "readabl": 21, "recept": 49, "recognit": [1, 47], "recommend": [1, 61], "reddit_tag": 22, "regener": 1, "relat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "repair": 32, "roberta": 51, "run": 1, "sampl": [0, 1], "score": [33, 41, 52], "sentiment": 51, "speaker": [11, 59, 62, 69], "start": [0, 1, 61, 62], "strategi": 50, "subject": 57, "summarize_featur": 72, "tabl": 61, "tag": 48, "take": 59, "team": [55, 61, 62], "technic": 11, "temporal_featur": 23, "textblob": [56, 57], "textblob_sentiment_analysi": 24, "time": 58, "token": 60, "toolkit": 61, "touch": 0, "train": 47, "troubleshoot": [1, 61], "turn": [1, 59], "turn_taking_featur": 25, "type": 60, "us": [1, 61], "usag": 1, "user": 11, "util": 69, "utter": [11, 39, 62, 69], "v": 42, "variance_in_dd": 26, "vector": 1, "virtual": [1, 61], "walkthrough": 1, "within_person_discursive_rang": 27, "word": [31, 36, 42, 60], "word_mimicri": 28, "work": 1, "your": [1, 42], "z": [41, 52], "zscore_chats_and_convers": 73}})
\ No newline at end of file
diff --git a/docs/build/html/utils/calculate_chat_level_features.html b/docs/build/html/utils/calculate_chat_level_features.html
index 6e468509..1874a432 100644
--- a/docs/build/html/utils/calculate_chat_level_features.html
+++ b/docs/build/html/utils/calculate_chat_level_features.html
@@ -348,8 +348,7 @@
 <dd><p>Calculate features relevant to the timestamps of each chat.</p>
 <p>This function calculates and appends the following temporal feature to the chat data:
 - Time difference between messages sent</p>
-<p>It checks whether the ‘timestamp’ column is available. If not, it tries to calculate
-using ‘timestamp_start’ and ‘timestamp_end’ columns.</p>
+<p>It assumes the ‘timestamp’ column is available, which is checked in feature_builder.py.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Returns<span class="colon">:</span></dt>
 <dd class="field-odd"><p>None</p>
diff --git a/docs/build/html/utils/check_embeddings.html b/docs/build/html/utils/check_embeddings.html
index 5dc086ce..76302496 100644
--- a/docs/build/html/utils/check_embeddings.html
+++ b/docs/build/html/utils/check_embeddings.html
@@ -111,7 +111,7 @@
 <span id="check-embeddings-module"></span><h1>check_embeddings module<a class="headerlink" href="#module-utils.check_embeddings" title="Link to this heading"></a></h1>
 <dl class="py function">
 <dt class="sig sig-object py" id="utils.check_embeddings.check_embeddings">
-<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">check_embeddings</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chat_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vect_path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bert_path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">need_sentence</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">need_sentiment</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">regenerate_vectors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'message'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.check_embeddings" title="Link to this definition"></a></dt>
+<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">check_embeddings</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chat_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vect_path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bert_path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">need_sentence</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">need_sentiment</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">regenerate_vectors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'message'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.check_embeddings" title="Link to this definition"></a></dt>
 <dd><p>Check if embeddings and required lexicons exist, and generate them if they don’t.</p>
 <p>This function ensures the necessary vector and BERT embeddings are available.
 It also checks for the presence of certainty and lexicon files, generating them if needed.</p>
@@ -124,6 +124,7 @@
 <li><p><strong>need_sentence</strong> (<em>bool</em>) – Whether at least one feature will require SBERT vectors; we will not need to calculate them otherwise.</p></li>
 <li><p><strong>need_sentiment</strong> (<em>bool</em>) – Whether at least one feature will require the RoBERTa sentiments; we will not need to calculate them otherwise.</p></li>
 <li><p><strong>regenerate_vectors</strong> (<em>bool</em><em>, </em><em>optional</em>) – If true, will regenerate vector data even if it already exists</p></li>
+<li><p><strong>use_gpu</strong> (<em>bool</em>) – If true, will use GPU for embeddings if available; otherwise, will use CPU.</p></li>
 <li><p><strong>message_col</strong> (<em>str</em><em>, </em><em>optional</em>) – A string representing the column name that should be selected as the message. Defaults to “message”.</p></li>
 </ul>
 </dd>
@@ -159,7 +160,7 @@
 
 <dl class="py function">
 <dt class="sig sig-object py" id="utils.check_embeddings.generate_bert">
-<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">generate_bert</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chat_data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.generate_bert" title="Link to this definition"></a></dt>
+<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">generate_bert</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chat_data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.generate_bert" title="Link to this definition"></a></dt>
 <dd><p>Generates RoBERTa sentiment scores for the given chat data and saves them to a CSV file.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -228,7 +229,7 @@
 
 <dl class="py function">
 <dt class="sig sig-object py" id="utils.check_embeddings.generate_vect">
-<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">generate_vect</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chat_data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.generate_vect" title="Link to this definition"></a></dt>
+<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">generate_vect</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chat_data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.generate_vect" title="Link to this definition"></a></dt>
 <dd><p>Generates sentence vectors for the given chat data and saves them to a CSV file.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -236,6 +237,7 @@
 <li><p><strong>chat_data</strong> (<em>pd.DataFrame</em>) – Contains message data to be vectorized.</p></li>
 <li><p><strong>output_path</strong> (<em>str</em>) – Path to save the CSV file containing message embeddings.</p></li>
 <li><p><strong>message_col</strong> (<em>str</em><em>, </em><em>optional</em>) – A string representing the column name that should be selected as the message. Defaults to “message”.</p></li>
+<li><p><strong>device</strong> (<em>str</em>) – A string representing the device to use for computation, either “cpu” or “cuda”.</p></li>
 <li><p><strong>batch_size</strong> (<em>int</em>) – The size of each batch for processing sentiment analysis. Defaults to 64.</p></li>
 </ul>
 </dd>
@@ -259,7 +261,7 @@
 
 <dl class="py function">
 <dt class="sig sig-object py" id="utils.check_embeddings.get_sentiment">
-<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">get_sentiment</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">texts</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.get_sentiment" title="Link to this definition"></a></dt>
+<span class="sig-prename descclassname"><span class="pre">utils.check_embeddings.</span></span><span class="sig-name descname"><span class="pre">get_sentiment</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">texts</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_bert</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.check_embeddings.get_sentiment" title="Link to this definition"></a></dt>
 <dd><p>Analyzes the sentiment of the given list of texts using a BERT model and returns a DataFrame with scores for positive, negative, and neutral sentiments.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
diff --git a/docs/build/html/utils/preprocess.html b/docs/build/html/utils/preprocess.html
index 3e00f85a..1d9afed0 100644
--- a/docs/build/html/utils/preprocess.html
+++ b/docs/build/html/utils/preprocess.html
@@ -57,7 +57,6 @@
 <li class="toctree-l2 current"><a class="reference internal" href="index.html#other-utilities">Other Utilities</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="preload_word_lists.html">preload_word_lists module</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">preprocess module</a><ul>
-<li class="toctree-l4"><a class="reference internal" href="#utils.preprocess.assert_key_columns_present"><code class="docutils literal notranslate"><span class="pre">assert_key_columns_present()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#utils.preprocess.compress"><code class="docutils literal notranslate"><span class="pre">compress()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#utils.preprocess.create_cumulative_rows"><code class="docutils literal notranslate"><span class="pre">create_cumulative_rows()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#utils.preprocess.get_turn_id"><code class="docutils literal notranslate"><span class="pre">get_turn_id()</span></code></a></li>
@@ -106,26 +105,6 @@
              
   <section id="module-utils.preprocess">
 <span id="preprocess-module"></span><h1>preprocess module<a class="headerlink" href="#module-utils.preprocess" title="Link to this heading"></a></h1>
-<dl class="py function">
-<dt class="sig sig-object py" id="utils.preprocess.assert_key_columns_present">
-<span class="sig-prename descclassname"><span class="pre">utils.preprocess.</span></span><span class="sig-name descname"><span class="pre">assert_key_columns_present</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_names</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#utils.preprocess.assert_key_columns_present" title="Link to this definition"></a></dt>
-<dd><p>Ensure that the DataFrame has essential columns and handle missing values.</p>
-<p>This function  if the essential columns <cite>conversation_id_col</cite>, <cite>speaker_id_col</cite>, and
-<cite>message_col</cite> are present. If any of these columns are missing, a
-KeyError is raised.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>df</strong> (<em>pandas.DataFrame</em>) – The DataFrame to check and process.</p></li>
-<li><p><strong>column_names</strong> (<em>dict</em>) – Columns to preprocess.</p></li>
-</ul>
-</dd>
-<dt class="field-even">Raises<span class="colon">:</span></dt>
-<dd class="field-even"><p><strong>KeyError</strong> – If one of <cite>conversation_id_col</cite>, <cite>speaker_id_col</cite>, and <cite>message_col</cite> columns is missing.</p>
-</dd>
-</dl>
-</dd></dl>
-
 <dl class="py function">
 <dt class="sig sig-object py" id="utils.preprocess.compress">
 <span class="sig-prename descclassname"><span class="pre">utils.preprocess.</span></span><span class="sig-name descname"><span class="pre">compress</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">turn_df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#utils.preprocess.compress" title="Link to this definition"></a></dt>
@@ -295,7 +274,7 @@
 
 <dl class="py function">
 <dt class="sig sig-object py" id="utils.preprocess.remove_unhashable_cols">
-<span class="sig-prename descclassname"><span class="pre">utils.preprocess.</span></span><span class="sig-name descname"><span class="pre">remove_unhashable_cols</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_names</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">DataFrame</span></span></span><a class="headerlink" href="#utils.preprocess.remove_unhashable_cols" title="Link to this definition"></a></dt>
+<span class="sig-prename descclassname"><span class="pre">utils.preprocess.</span></span><span class="sig-name descname"><span class="pre">remove_unhashable_cols</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_names</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">warning</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">DataFrame</span></span></span><a class="headerlink" href="#utils.preprocess.remove_unhashable_cols" title="Link to this definition"></a></dt>
 <dd><p>If a required column contains unhashable types, raise an error.
 Otherwise, remove those columns from the DataFrame and print a warning message.</p>
 <dl class="field-list simple">
diff --git a/docs/source/basics.rst b/docs/source/basics.rst
index 95eba513..1fe30963 100644
--- a/docs/source/basics.rst
+++ b/docs/source/basics.rst
@@ -92,11 +92,13 @@ Here are some parameters that can be customized. For more details, refer to the
 
 5. ``regenerate_vectors``: Force-regenerate vector data even if it already exists.
 
-6. **compute_vectors_from_preprocessed**: Computes vectors using preprocessed text (that is, with capitalization and punctuation removed). This was the default behavior for v.0.1.3 and earlier, but we now default to computing metrics on the unpreprocessed text (which INCLUDES capitalization and punctuation), and this parameter now defaults to False.
+6. ``use_gpu``: If set to True and a GPU is available, the package will generate sentence vectors (SBERT) and RoBERTa sentiments using the GPU. Defaults to False (which means the package will only use the CPU).
 
-7. **custom_liwc_dictionary_path**: Allows the user to "bring their own" LIWC dictionary, and thereby access more recent versions of the LIWC features. Our default version of LIWC is 2007, but users can obtain more recent versions of the lexicon by contacting `Ryan Boyd <https://www.ryanboyd.io/>`_ and `Jamie Pennebaker <https://liberalarts.utexas.edu/psychology/faculty/pennebak>`_. For more information on using the custom LIWC dictionary, please see :ref:`liwc`.
+7. ``compute_vectors_from_preprocessed``: Computes vectors using preprocessed text (that is, with capitalization and punctuation removed). This was the default behavior for v.0.1.3 and earlier, but we now default to computing metrics on the unpreprocessed text (which INCLUDES capitalization and punctuation), and this parameter now defaults to False.
 
-8. **Custom Aggregation of Utterance (Chat)-Level Attributes** (``convo_aggregation``, ``convo_methods``, ``convo_columns``, ``user_aggregation``, ``user_methods``, and ``user_columns``): Customize the ways in which attributes at a lower level of analysis (for example, the number of words in a given message) get aggregated to a higher level of analysis (for example, the total number of words in an entire conversation.) See the Worked Example (:ref:`custom_aggregation`) for details.
+8. ``custom_liwc_dictionary_path``: Allows the user to "bring their own" LIWC dictionary, and thereby access more recent versions of the LIWC features. Our default version of LIWC is 2007, but users can obtain more recent versions of the lexicon by contacting `Ryan Boyd <https://www.ryanboyd.io/>`_ and `Jamie Pennebaker <https://liberalarts.utexas.edu/psychology/faculty/pennebak>`_. For more information on using the custom LIWC dictionary, please see :ref:`liwc`.
+
+9. **Custom Aggregation of Utterance (Chat)-Level Attributes** (``convo_aggregation``, ``convo_methods``, ``convo_columns``, ``user_aggregation``, ``user_methods``, and ``user_columns``): Customize the ways in which attributes at a lower level of analysis (for example, the number of words in a given message) get aggregated to a higher level of analysis (for example, the total number of words in an entire conversation.) See the Worked Example (:ref:`custom_aggregation`) for details.
 
 Example Usage:
 
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index e349c7d4..bab74995 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -91,10 +91,10 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de
 		speaker_id_col = "speaker_nickname",
 		message_col = "message",
 		timestamp_col = "timestamp",
-		grouping_keys = ["batch_num", "round_num"],
+		grouping_keys = ["batch_num", "round_num"], # NOTE: This example demonstrates grouping. Use conversation_id_col if you have a single conversation identifier.
 		vector_directory = "./vector_data/",
 		output_file_base = "jury_output",
-		turns = True
+		turns = True # NOTE: This defaults to False. Decide whether you want to combine successive 'utterances' by the same person as a 'turn.'
 	)
 	jury_feature_builder.featurize()
 
@@ -219,6 +219,12 @@ Regenerating Vector Cache
 
 	* By default, **we assume that, if your output file is named the same, that the underlying vectors are the same**. If this isn't true, you should set **regenerate_vectors = True** in order to clear out the cache and re-generate the RoBERTa and SBERT outputs.
 
+
+Generating Vectors using GPU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+By default, we use the CPU to generate sentence vectors and cached RoBERTa sentimets. To override this feature and use a GPU when available (which will speed up the computation of the vectors), turn ``use_gpu`` to True.
+
+
 Custom Features
 ~~~~~~~~~~~~~~~~~
 
diff --git a/pyproject.toml b/pyproject.toml
index 71e84434..07bd0f7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "team_comm_tools"
-version = "0.1.7"
+version = "0.1.8"
 requires-python = ">= 3.10"
 dependencies = [
   "chardet>=3.0.4",
diff --git a/src/team_comm_tools/feature_builder.py b/src/team_comm_tools/feature_builder.py
index 759bfbda..fe433b08 100644
--- a/src/team_comm_tools/feature_builder.py
+++ b/src/team_comm_tools/feature_builder.py
@@ -111,6 +111,8 @@ class FeatureBuilder:
     :param user_columns: Specifies which columns (at the utterance/chat level) to aggregate for the 
         speaker/user level. Defaults to all numeric columns.
     :type user_columns: list, optional
+    :param use_gpu: Specifies whether to use GPU for vert/bert model. Defaults to False.
+    :type use_gpu: bool, optional
     :return: The FeatureBuilder writes the generated features to files in the specified paths. The progress 
         will be printed in the terminal, indicating completion with "All Done!".
     :rtype: None
@@ -144,7 +146,8 @@ def __init__(
             convo_columns: list = None,
             user_aggregation = True,
             user_methods: list = ['mean', 'max', 'min', 'stdev'],
-            user_columns: list = None
+            user_columns: list = None,
+            use_gpu: bool = False
         ) -> None:
 
         # Some error catching
@@ -152,7 +155,7 @@ def __init__(
             raise TypeError(f"Expected a Pandas DataFrame as input_df, but got {type(df).__name__})")
         
         print("Initializing Featurization...")
-
+        input_df = input_df.reset_index(drop=True) # reset index to avoid issues with indexing later on
         ###### Set all parameters ######
         
         assert(all(0 <= x <= 1 for x in analyze_first_pct)) # first, type check that this is a list of numbers between 0 and 1
@@ -180,6 +183,7 @@ def __init__(
         self.user_aggregation = user_aggregation
         self.user_methods = user_methods
         self.user_columns = user_columns
+        self.use_gpu = use_gpu
         # Defining input and output paths.
         self.chat_data = input_df.copy()
         self.orig_data = input_df.copy()
@@ -392,7 +396,7 @@ def __init__(
         self.vect_path = vector_directory + "sentence/" + ("turns" if self.turns else "chats") + "/" + base_file_name        
         self.bert_path = vector_directory + "sentiment/" + ("turns" if self.turns else "chats") + "/" + base_file_name
 
-        check_embeddings(self.chat_data, self.vect_path, self.bert_path, need_sentence, need_sentiment, self.regenerate_vectors, message_col = self.vector_colname)
+        check_embeddings(self.chat_data, self.vect_path, self.bert_path, need_sentence, need_sentiment, self.regenerate_vectors, self.use_gpu, message_col = self.vector_colname)
 
         if(need_sentence):
             self.vect_data = pd.read_csv(self.vect_path, encoding='mac_roman')
@@ -577,6 +581,7 @@ def preprocess_chat_data(self) -> None:
         # create the appropriate grouping variables and assert the columns are present
         self.chat_data = preprocess_conversation_columns(self.chat_data, self.column_names, self.grouping_keys, self.cumulative_grouping, self.within_task)
         self.chat_data = remove_unhashable_cols(self.chat_data, self.column_names)
+        self.orig_data = remove_unhashable_cols(self.orig_data, self.column_names, warning=False) # remove unhashable columns from the original data too to avoid issues with drop_duplicates
 
         # save original column with no preprocessing
         self.chat_data[self.message_col + "_original"] = self.chat_data[self.message_col]
diff --git a/src/team_comm_tools/feature_dict.py b/src/team_comm_tools/feature_dict.py
index 63100e68..69885759 100644
--- a/src/team_comm_tools/feature_dict.py
+++ b/src/team_comm_tools/feature_dict.py
@@ -480,6 +480,7 @@
       "num_emphasis",
       "num_bullet_points",
       "num_numbered_points",
+      "num_line_breaks",
       "num_quotes",
       "num_block_quote_responses",
       "num_ellipses",
diff --git a/src/team_comm_tools/features/reddit_tags.py b/src/team_comm_tools/features/reddit_tags.py
index 8a191bad..44f9cbbe 100644
--- a/src/team_comm_tools/features/reddit_tags.py
+++ b/src/team_comm_tools/features/reddit_tags.py
@@ -1,7 +1,6 @@
-import numpy as np
 import string
 import re
-
+from team_comm_tools.utils.preprocess import EMOJIS
 
 def count_all_caps(text):
     """
@@ -191,6 +190,8 @@ def count_emojis(text):
     Returns:
         int: The number of emojis in the input text.
     """
-    emoji_pattern = r'[:;]-?\)+'
-    emojis = re.findall(emoji_pattern, text)
+    emoji_list = sorted(EMOJIS, key=len, reverse=True)
+    emoji_pattern = "|".join(re.escape(e) for e in emoji_list)
+    compiled_pattern = re.compile(emoji_pattern)
+    emojis = re.findall(compiled_pattern, text)
     return len(emojis)
diff --git a/src/team_comm_tools/utils/calculate_chat_level_features.py b/src/team_comm_tools/utils/calculate_chat_level_features.py
index 052662a2..051544b7 100644
--- a/src/team_comm_tools/utils/calculate_chat_level_features.py
+++ b/src/team_comm_tools/utils/calculate_chat_level_features.py
@@ -470,7 +470,8 @@ def get_reddit_features(self) -> None:
             count_ellipses)
         self.chat_data["num_parentheses"] = self.chat_data["message_lower_with_punc"].apply(
             count_parentheses)
-        self.chat_data["num_emoji"] = self.chat_data["message_lower_with_punc"].apply(
+        self.chat_data["num_emoji"] = self.chat_data[self.message_col +
+                                                        "_original"].apply(
             count_emojis)
 
     def get_named_entity(self) -> None:
diff --git a/src/team_comm_tools/utils/check_embeddings.py b/src/team_comm_tools/utils/check_embeddings.py
index f0b364c4..e5c6eba7 100644
--- a/src/team_comm_tools/utils/check_embeddings.py
+++ b/src/team_comm_tools/utils/check_embeddings.py
@@ -17,10 +17,8 @@
 
 logging.set_verbosity(40) # only log errors
 
-model_vect = SentenceTransformer('all-MiniLM-L6-v2')
 MODEL  = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
-model_bert = AutoModelForSequenceClassification.from_pretrained(MODEL)
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 EMOJIS_TO_PRESERVE = {
     "(:", "(;", "):", "/:", ":(", ":)", ":/", ";)"
@@ -28,7 +26,7 @@
 
 # Check if embeddings exist
 def check_embeddings(chat_data: pd.DataFrame, vect_path: str, bert_path: str, need_sentence: bool, 
-                     need_sentiment: bool, regenerate_vectors: bool, message_col: str = "message"):
+                     need_sentiment: bool, regenerate_vectors: bool, use_gpu: bool, message_col: str = "message"):
     """
     Check if embeddings and required lexicons exist, and generate them if they don't.
 
@@ -47,35 +45,46 @@ def check_embeddings(chat_data: pd.DataFrame, vect_path: str, bert_path: str, ne
     :type need_sentiment: bool
     :param regenerate_vectors: If true, will regenerate vector data even if it already exists
     :type regenerate_vectors: bool, optional
+    :param use_gpu: If true, will use GPU for embeddings if available; otherwise, will use CPU.
+    :type use_gpu: bool
     :param message_col: A string representing the column name that should be selected as the message. Defaults to "message".
     :type message_col: str, optional
 
     :return: None
     :rtype: None
     """
+    device = "cpu"
+    if use_gpu:
+        if torch.cuda.is_available():
+            print("Using GPU for embeddings.")
+            device = "cuda"
+        else:
+            print("GPU not available, using CPU for embeddings.")
+
     if (regenerate_vectors or (not os.path.isfile(vect_path))) and need_sentence:
-        generate_vect(chat_data, vect_path, message_col)
+        generate_vect(chat_data, vect_path, message_col, device)
     if (regenerate_vectors or (not os.path.isfile(bert_path))) and need_sentiment:
-        generate_bert(chat_data, bert_path, message_col)
+        generate_bert(chat_data, bert_path, message_col, device)
 
     try:
         vector_df = pd.read_csv(vect_path)
         # check whether the given vector and bert data matches length of chat data 
         if len(vector_df) != len(chat_data):
             print("ERROR: The length of the vector data does not match the length of the chat data. Regenerating...")
-            generate_vect(chat_data, vect_path, message_col)
+            generate_vect(chat_data, vect_path, message_col, device)
     except FileNotFoundError: # It's OK if we don't have the path, if the sentence vectors are not necessary
         if need_sentence:
-            generate_vect(chat_data, vect_path, message_col)
+            generate_vect(chat_data, vect_path, message_col, device)
 
     try:
         bert_df = pd.read_csv(bert_path)
         if len(bert_df) != len(chat_data):
             print("ERROR: The length of the sentiment data does not match the length of the chat data. Regenerating...")
-            generate_bert(chat_data, bert_path, message_col)
+            # delete the file
+            generate_bert(chat_data, bert_path, message_col, device)
     except FileNotFoundError:
         if need_sentiment: # It's OK if we don't have the path, if the sentiment features are not necessary
-            generate_bert(chat_data, bert_path, message_col)
+            generate_bert(chat_data, bert_path, message_col, device)
     
     # Get the lexicon pickle(s) if they don't exist
     current_script_directory = Path(__file__).resolve().parent
@@ -348,7 +357,7 @@ def get_nan_vector():
     with open(nan_vector_file_path, "r") as f:
         return str_to_vec(f.read())
 
-def generate_vect(chat_data, output_path, message_col, batch_size = 64):
+def generate_vect(chat_data, output_path, message_col, device, batch_size=64):
     """
     Generates sentence vectors for the given chat data and saves them to a CSV file.
 
@@ -358,6 +367,8 @@ def generate_vect(chat_data, output_path, message_col, batch_size = 64):
     :type output_path: str
     :param message_col: A string representing the column name that should be selected as the message. Defaults to "message".
     :type message_col: str, optional
+    :param device: A string representing the device to use for computation, either "cpu" or "cuda".
+    :type device: str
     :param batch_size: The size of each batch for processing sentiment analysis. Defaults to 64.
     :type batch_size: int
     :raises FileNotFoundError: If the output path is invalid.
@@ -365,6 +376,7 @@ def generate_vect(chat_data, output_path, message_col, batch_size = 64):
     :rtype: None
     """
     print(f"Generating SBERT sentence vectors...")
+    model_vect = SentenceTransformer('all-MiniLM-L6-v2', device=device)
 
     nan_vector = get_nan_vector()
     empty_to_nan = [text if text and text.strip() else None for text in chat_data[message_col].tolist()]
@@ -383,7 +395,7 @@ def generate_vect(chat_data, output_path, message_col, batch_size = 64):
     Path(output_path).parent.mkdir(parents=True, exist_ok=True)
     embedding_df.to_csv(output_path, index=False)
 
-def generate_bert(chat_data, output_path, message_col, batch_size=64):
+def generate_bert(chat_data, output_path, message_col, device, batch_size=64):
     """
     Generates RoBERTa sentiment scores for the given chat data and saves them to a CSV file.
 
@@ -400,20 +412,23 @@ def generate_bert(chat_data, output_path, message_col, batch_size=64):
     :rtype: None
     """
     print(f"Generating RoBERTa sentiments...")
-
+    model_bert = AutoModelForSequenceClassification.from_pretrained(MODEL)
+    model_bert.to(device)
     messages = chat_data[message_col].tolist()
     batch_sentiments_df = pd.DataFrame()
 
+    batch_sentiments_lst = []
     for i in tqdm(range(0, len(messages), batch_size)):
         batch = messages[i:i + batch_size]
-        batch_df = get_sentiment(batch)
-        batch_sentiments_df = pd.concat([batch_sentiments_df, batch_df], ignore_index=True)
-
+        batch_df = get_sentiment(batch, model_bert, device)
+        batch_sentiments_lst.append(batch_df)
+    batch_sentiments_df = pd.concat(batch_sentiments_lst, ignore_index=True)
+    
     # Create directories along the path if they don't exist
     Path(output_path).parent.mkdir(parents=True, exist_ok=True)
     batch_sentiments_df.to_csv(output_path, index=False)
 
-def get_sentiment(texts):
+def get_sentiment(texts, model_bert, device):
     """
     Analyzes the sentiment of the given list of texts using a BERT model and returns a DataFrame with scores for positive, negative, and neutral sentiments.
 
@@ -432,9 +447,11 @@ def get_sentiment(texts):
         return pd.DataFrame(np.nan, index=texts_series.index, columns=['positive_bert', 'negative_bert', 'neutral_bert'])
 
     encoded = tokenizer(non_null_non_empty_texts, padding=True, truncation=True, max_length=512, return_tensors='pt')
-    output = model_bert(**encoded)
+    encoded = {k: v.to(device) for k, v in encoded.items()}
+    with torch.no_grad():
+        output = model_bert(**encoded)
 
-    scores = output[0].detach().numpy()
+    scores = output[0].detach().cpu().numpy()
     scores = softmax(scores, axis=1)
 
     sent_dict = {
diff --git a/src/team_comm_tools/utils/preprocess.py b/src/team_comm_tools/utils/preprocess.py
index 18a31b1d..0234eba8 100644
--- a/src/team_comm_tools/utils/preprocess.py
+++ b/src/team_comm_tools/utils/preprocess.py
@@ -1,7 +1,16 @@
 import re
 import pandas as pd
-import warnings
-
+# import warnings
+
+EMOJIS = {
+    "(:", "(;", "):", "/:", ":(", ":)", ":/", ";)", # 8 emojis from LIWC 2017
+    ";(", # variants
+    ":-)", ":-(", ":-/", ";-)", # with noses
+    ":D", ":P", ":p", ":-D", ":-P", ":-p", # big grin & tongue out
+    ":O", ":-O", ":o", ":-o", # shock
+    "XD", "xD", "xd", # laughing variants
+    "<3", "</3", # hearts
+}
 
 def preprocess_conversation_columns(df: pd.DataFrame, column_names: dict, grouping_keys: list, 
                                     cumulative_grouping: bool = False, within_task: bool = False) -> pd.DataFrame:
@@ -38,7 +47,7 @@ def preprocess_conversation_columns(df: pd.DataFrame, column_names: dict, groupi
         df = df[df.columns.tolist()[-1:] + df.columns.tolist()[0:-1]] # make the new column first
     return df
 
-def remove_unhashable_cols(df: pd.DataFrame, column_names: dict) -> pd.DataFrame:
+def remove_unhashable_cols(df: pd.DataFrame, column_names: dict, warning: bool=True) -> pd.DataFrame:
     """
     If a required column contains unhashable types, raise an error.
     Otherwise, remove those columns from the DataFrame and print a warning message.
@@ -81,8 +90,9 @@ def is_unhashable(obj):
         if col in column_names.values():
             raise ValueError(error_message)
         else:
-            warnings.warn(f"WARNING: {error_message}. Removing '{col}' from the DataFrame.")
             removable_cols.append(col)
+            if warning:
+                print(f"WARNING: {error_message}. Removing '{col}' from the DataFrame.")
     if removable_cols:
         df = df.drop(columns=removable_cols)
     return df
@@ -111,13 +121,10 @@ def preprocess_text(text: str) -> str:
     :return: The processed text containing only alphanumeric characters and spaces in lowercase.
     :rtype: str
     """
-    emojis_to_preserve = {
-        "(:", "(;", "):", "/:", ":(", ":)", ":/", ";)"
-    }
 
     emoji_placeholders = {}
     # Replace each emoji with a unique placeholder
-    for i, emoji in enumerate(emojis_to_preserve):
+    for i, emoji in enumerate(EMOJIS):
         placeholder = f"EMOJI_{i}"
         emoji_placeholders[placeholder] = emoji
         text = text.replace(emoji, placeholder)
@@ -125,7 +132,8 @@ def preprocess_text(text: str) -> str:
     # Clean the text by removing unwanted characters, except placeholders
     text = re.sub(r"[^a-zA-Z0-9 EMOJI_]+", '', text)
     # Restore the preserved emojis by replacing placeholders back to original emojis
-    for placeholder, emoji in emoji_placeholders.items():
+    for placeholder in sorted(emoji_placeholders.keys(), key=len, reverse=True):
+        emoji = emoji_placeholders[placeholder]
         text = text.replace(placeholder, emoji)
 
     return text.lower()
diff --git a/tests/data/cleaned_data/test_chat_level.csv b/tests/data/cleaned_data/test_chat_level.csv
index fb8dc8e0..a463c715 100644
--- a/tests/data/cleaned_data/test_chat_level.csv
+++ b/tests/data/cleaned_data/test_chat_level.csv
@@ -1332,3 +1332,123 @@ K,1,This is the same text.,positivity_zscore_conversation,
 10_mix,first_person,"belief% sed— forgot{ euismod£ dolor/ couldn't¥ ipsum^ amet— affectation] Lorem> ipsum} drank^ Lorem) sed' forever[ amet/ mailed? euismod< sed? mightve[ dolor[ sed; affect' dolor> sed\ consistently' Lorem| euismod: may: tempor, colon( ipsum# xanax¥ dolor] cruel, amet... think– dolor< tempor, strikingly% euismod< cocks£ ipsum~ insides! ipsum. aren't- Lorem. sandy% adipiscing... consectetur) illuminating+ amet# steadiness= euismod¥ sed( except~ consectetur' tempor) most} adipiscing? here's"" tempor' dolor# understand; sed{ ipsum",first_person_lexical_wordcount,0
 10_mix,nltk_english_stopwords,"belief% sed— forgot{ euismod£ dolor/ couldn't¥ ipsum^ amet— affectation] Lorem> ipsum} drank^ Lorem) sed' forever[ amet/ mailed? euismod< sed? mightve[ dolor[ sed; affect' dolor> sed\ consistently' Lorem| euismod: may: tempor, colon( ipsum# xanax¥ dolor] cruel, amet... think– dolor< tempor, strikingly% euismod< cocks£ ipsum~ insides! ipsum. aren't- Lorem. sandy% adipiscing... consectetur) illuminating+ amet# steadiness= euismod¥ sed( except~ consectetur' tempor) most} adipiscing? here's"" tempor' dolor# understand; sed{ ipsum",nltk_english_stopwords_lexical_wordcount,5
 10_mix,hedge_words,"belief% sed— forgot{ euismod£ dolor/ couldn't¥ ipsum^ amet— affectation] Lorem> ipsum} drank^ Lorem) sed' forever[ amet/ mailed? euismod< sed? mightve[ dolor[ sed; affect' dolor> sed\ consistently' Lorem| euismod: may: tempor, colon( ipsum# xanax¥ dolor] cruel, amet... think– dolor< tempor, strikingly% euismod< cocks£ ipsum~ insides! ipsum. aren't- Lorem. sandy% adipiscing... consectetur) illuminating+ amet# steadiness= euismod¥ sed( except~ consectetur' tempor) most} adipiscing? here's"" tempor' dolor# understand; sed{ ipsum",hedge_words_lexical_wordcount,0
+emoji_test,emoji_user_a,Hello :),num_emoji,1
+emoji_test,emoji_user_b,Nice to see you :-)),num_emoji,1
+emoji_test,emoji_user_a,Hey there ;-) How are you? :),num_emoji,2
+emoji_test,emoji_user_b,No emoji here.,num_emoji,0
+emoji_test,emoji_user_a,;) :) :-))),num_emoji,3
+emoji_test,emoji_user_b,Mixed emotions: :-( ;( :( ,num_emoji,3
+emoji_test,emoji_user_a,Too many smiles :) :) :),num_emoji,3
+emoji_test,emoji_user_b,Tricky one: :-)-),num_emoji,1
+emoji_test,emoji_user_a,Extra characters :-))abc:-),num_emoji,2
+emoji_test,emoji_user_b,Combo ;-):-);-),num_emoji,3
+emoji_test,emoji_user_a,Sad day :(,num_emoji,1
+emoji_test,emoji_user_b,Confused face :/,num_emoji,1
+emoji_test,emoji_user_a,Double smiles :) :),num_emoji,2
+emoji_test,emoji_user_b,This is fine :D,num_emoji,1
+emoji_test,emoji_user_a,Edge case :-):-):-D,num_emoji,3
+emoji_test,emoji_user_b,Broken smile :-))abc:-),num_emoji,2
+emoji_test,emoji_user_a,Mixed (: ;),num_emoji,2
+emoji_test,emoji_user_b,Only symbols $%&!,num_emoji,0
+emoji_test,emoji_user_a,Nested :)hello:),num_emoji,2
+emoji_test,emoji_user_b,Sad then happy :( :),num_emoji,2
+emoji_test,emoji_user_a,No spaces:;-):),num_emoji,2
+emoji_test,emoji_user_b,Triple threat :):):),num_emoji,3
+emoji_test,emoji_user_a,Reverse smile ): (:,num_emoji,2
+emoji_test,emoji_user_b,"Hey! I'm doing great :-), thanks for asking. How about you? ;)",num_emoji,2
+emoji_test,emoji_user_a,Ugh... today was rough :( Everything just kept going wrong :-(,num_emoji,2
+emoji_test,emoji_user_b,LOL! That was hilarious :D:D:D!! Can't stop laughing!,num_emoji,3
+emoji_test,emoji_user_a,Okay... so he said 'I'll be there :)' — and then never showed up.,num_emoji,1
+emoji_test,emoji_user_b,"Meeting recaD:
+- Client happy :)
+- Budget approved ;)
+- Launch on track :-D",num_emoji,3
+emoji_test,emoji_user_a,"Meeting recap:
+- Client happy :)
+- Budget approved ;)
+- Launch on track :-D",num_emoji,3
+emoji_test,emoji_user_b,Mixed signals: first he smiled :) then frowned :( then smiled again :)),num_emoji,3
+emoji_test,emoji_user_a,Edge-case test —;-):-(;-))... any of these match?,num_emoji,3
+emoji_test,emoji_user_b,"Email thread:
+> Sure, that works for me :)
+> Thanks! :-)
+> See you then ;)",num_emoji,3
+emoji_test,emoji_user_a,Total chaos!!! :(( :(( :(( — but at least coffee :-),num_emoji,4
+emoji_test,emoji_user_b,"He walked in and said, 'Good morning :-)' — like nothing happened!",num_emoji,1
+emoji_test,emoji_user_a,Haha :D that was funny!,num_emoji,1
+emoji_test,emoji_user_b,You got me :-P,num_emoji,1
+emoji_test,emoji_user_a,Just kidding :p :P,num_emoji,2
+emoji_test,emoji_user_b,All the reactions: :D :-D :P :-P,num_emoji,4
+emoji_test,emoji_user_a,Lowercase alert :-p :p,num_emoji,2
+emoji_test,emoji_user_b,No way! :O,num_emoji,1
+emoji_test,emoji_user_a,You're kidding :-O,num_emoji,1
+emoji_test,emoji_user_b,Mixed shock faces: :o :-o,num_emoji,2
+emoji_test,emoji_user_a,Wow :O :o :-O :-o,num_emoji,4
+emoji_test,emoji_user_b,OMG XD that cracked me up,num_emoji,1
+emoji_test,emoji_user_a,xD lol,num_emoji,1
+emoji_test,emoji_user_b,xd XD xD,num_emoji,3
+emoji_test,emoji_user_a,Try this combo: xdxd,num_emoji,2
+emoji_test,emoji_user_b,Real XDPro was here,num_emoji,1
+emoji_test,emoji_user_a,I love this <3,num_emoji,1
+emoji_test,emoji_user_b,Broken heart </3,num_emoji,1
+emoji_test,emoji_user_a,Love and heartbreak: <3 </3,num_emoji,2
+emoji_test,emoji_user_b,Wow<3cool</3,num_emoji,2
+convokit_test,convokit_user_a,I really appreciate that you've done them.,gratitude_politeness_convokit,1
+convokit_test,convokit_user_b,Nice work so far on your rewrite.,deference_politeness_convokit,1
+convokit_test,convokit_user_a,"Hey, I just tried to reach out",indirect_greeting_politeness_convokit,1
+convokit_test,convokit_user_b,Wow! / This is a great way to deal with it,haspositive_politeness_convokit,1
+convokit_test,convokit_user_a,"If you're going to accuse me, I don't know what to say",hasnegative_politeness_convokit,1
+convokit_test,convokit_user_b,Sorry to bother you about this,apologizing_politeness_convokit,1
+convokit_test,convokit_user_a,Could you please say more about it,please_politeness_convokit,1
+convokit_test,convokit_user_b,Please do not remove warnings for it,please_start_politeness_convokit,1
+convokit_test,convokit_user_a,"By the way, where did you find it",indirect_btw_politeness_convokit,1
+convokit_test,convokit_user_b,What is your native language?,direct_question_politeness_convokit,1
+convokit_test,convokit_user_a,So can you retrieve it or not?,direct_start_politeness_convokit,1
+convokit_test,convokit_user_b,Could/Would you take a look at this?,subjunctive_politeness_convokit,1
+convokit_test,convokit_user_a,Can/Will you help with this?,indicative_politeness_convokit,1
+convokit_test,convokit_user_b,I have just put the article in the folder.,1st_person_start_politeness_convokit,1
+convokit_test,convokit_user_a,Could we find a less complex name for this?,1st_person_pl_politeness_convokit,1
+convokit_test,convokit_user_b,It is my view that this is a good idea.,1st_person_politeness_convokit,1
+convokit_test,convokit_user_a,But what's the good source you have in mind?,2nd_person_politeness_convokit,1
+convokit_test,convokit_user_b,You've reverted yourself to the previous version.,2nd_person_start_politeness_convokit,1
+convokit_test,convokit_user_a,I suggest we start with the basics.,hedges_politeness_convokit,1
+convokit_test,convokit_user_b,"In fact you did link, and it was a mistake.",factuality_politeness_convokit,1
+convokit_test,convokit_user_a,"We could meet tomorrow, or maybe not.",hashedge_politeness_convokit,1
+yeomans_test,yeomans_user_a,What the fuck are you doing?,Swearing_receptiveness_yeomans,1
+yeomans_test,yeomans_user_b,"Don’t worry, it's no big deal.",Reassurance_receptiveness_yeomans,1
+yeomans_test,yeomans_user_a,"This is, um, you know, a complicated scenario",Filler_Pause_receptiveness_yeomans,1
+yeomans_test,yeomans_user_b,Please let me know if you have any questions.,Let_Me_Know_receptiveness_yeomans,1
+yeomans_test,yeomans_user_a,"If you'd like, you could review this.",Give_Agency_receptiveness_yeomans,1
+yeomans_test,yeomans_user_b,"Honestly, I don't know",Truth_Intensifier_receptiveness_yeomans,1
+yeomans_test,yeomans_user_a,"Do me a favor, can you review this by tomorrow?",Ask_Agency_receptiveness_yeomans,1
+yeomans_test,yeomans_user_b,Send me the file.,Bare_Command_receptiveness_yeomans,1
+yeomans_test,yeomans_user_a,Bye I'll see you tomorrow,Goodbye_receptiveness_yeomans,1
+yeomans_test,yeomans_user_b,By the way how do you call this?,By_The_Way_receptiveness_yeomans,1
+yeomans_test,yeomans_user_a,"Dear Mr. Smith, thank you for your feedback.",Formal_Title_receptiveness_yeomans,1
+yeomans_test,yeomans_user_b,Dude that was dope,Informal_Title_receptiveness_yeomans,1
+yeomans_test,yeomans_user_a,"Do me a favour, can you upload it for me? Just allow me to finish another task.",Ask_Agency_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,"You idiot, what the fuck is going on? That's absurd.",Negative_Emotion_receptiveness_yeomans,3
+yeomans_test,yeomans_user_a,"You simply just need to wait, it's only 5 minutes.",Adverb_Limiter_receptiveness_yeomans,3
+yeomans_test,yeomans_user_b,Send me the file. Fix this bug. Write a summary.,Bare_Command_receptiveness_yeomans,3
+yeomans_test,yeomans_user_a,"I feel like we could approach this differently. In my opinion, that solution might be risky.",Subjectivity_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,Explain it. Why we want that?,Reasoning_receptiveness_yeomans,2
+yeomans_test,yeomans_user_a,Why did you do it that way? Where did you go?,WH_Questions_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,I guess they almost complete it.,Hedges_receptiveness_yeomans,2
+yeomans_test,yeomans_user_a,"Ma'am, no my lady, do you know Mr. Smith?",Formal_Title_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,"I agree, this is correct",Agreement_receptiveness_yeomans,2
+yeomans_test,yeomans_user_a,This is for you. Why you don't understand it's for you?,For_You_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,"Thank you, I'm really grateful",Gratitude_receptiveness_yeomans,3
+yeomans_test,yeomans_user_a,We here you. We totally understand,Acknowledgement_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,"Shit. You dumb asshole, what the hell? Who's that bastard? Suck my dick.",Swearing_receptiveness_yeomans,7
+yeomans_test,yeomans_user_a,"Hey hello good morning, oh actually good evening.",Hello_receptiveness_yeomans,4
+yeomans_test,yeomans_user_b,Are you sure? Is this the guy? Did he lie to you?,YesNo_Questions_receptiveness_yeomans,3
+yeomans_test,yeomans_user_a,I'm sorry I sincerely apologize.,Apology_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,Wow! Amazing! Perfect!,Affirmation_receptiveness_yeomans,3
+yeomans_test,yeomans_user_a,I love you. My friend,First_Person_Single_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,This is for me? How can you say it's for me? It's not for me!,For_Me_receptiveness_yeomans,3
+yeomans_test,yeomans_user_a,"Honestly, It's actually really important",Truth_Intensifier_receptiveness_yeomans,3
+yeomans_test,yeomans_user_b,"So, what's the deal? And, what did they offer?",Conjunction_Start_receptiveness_yeomans,2
+yeomans_test,yeomans_user_a,Could you please help? Would you mind pause?,Could_You_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,It's a bad idea. I hate this terrible suggestion.,Disagreement_receptiveness_yeomans,2
+yeomans_test,yeomans_user_a,"Bud, that's so weird bro",Informal_Title_receptiveness_yeomans,2
+yeomans_test,yeomans_user_b,You can take your time. I'll let you work,Give_Agency_receptiveness_yeomans,2
diff --git a/tests/test_feature_metrics.py b/tests/test_feature_metrics.py
index 53a9e7c9..c15b724a 100644
--- a/tests/test_feature_metrics.py
+++ b/tests/test_feature_metrics.py
@@ -26,29 +26,45 @@
     "./output/conv/test_conv_level_conv_complex_ts.csv")
 test_forward_flow_df = pd.read_csv("./output/chat/test_forward_flow_chat.csv")
 test_ner = pd.read_csv('./output/chat/test_named_entity_chat_level.csv')
-
+test_positivity_chat_df = pd.read_csv(
+    "./output/chat/test_positivity_chat_level.csv")
+info_exchange_zscore_chat_df = pd.read_csv(
+    "./output/chat/info_exchange_zscore_chats.csv")
+time_diff_chat_df = pd.read_csv(
+    './output/chat/test_time_pairs_dt_level_chat.csv')
 # Import the Feature Dictionary
 
-chat_features = [feature_dict[feature]["columns"]
-                 for feature in feature_dict.keys() if feature_dict[feature]["level"] == "Chat"]
-conversation_features = [feature_dict[feature]["columns"] for feature in feature_dict.keys(
-) if feature_dict[feature]["level"] == "Conversation"]
-
-num_features_chat = len(list(itertools.chain(*chat_features)))
-num_features_conv = len(list(itertools.chain(*conversation_features)))
+chat_features, conversation_features = [], []
+for feature in feature_dict.keys():
+    if feature_dict[feature]["level"] == "Chat":
+        chat_features.extend(feature_dict[feature]["columns"])
+    elif feature_dict[feature]["level"] == "Conversation":
+        conversation_features.extend(feature_dict[feature]["columns"])
+num_features_chat = len(chat_features)
+num_features_conv = len(conversation_features)
 
 # Print the test coverage
-num_tested_chat = test_chat_df['expected_column'].nunique() + test_chat_complex_df['feature'].nunique() + test_forward_flow_df['feature'].nunique()
 test_chat = test_chat_df['expected_column'].unique().tolist() + test_chat_complex_df['feature'].unique().tolist() + \
-            test_forward_flow_df['feature'].unique().tolist() + ["named_entities"] + ["time_diff"]
-tested_chat = len(set(test_chat))
-num_tested_conv = len(set(test_conv_df['expected_column'].unique().tolist() + test_conv_complex_df['feature'].unique().tolist()))
+            test_forward_flow_df['feature'].unique().tolist() + test_positivity_chat_df['expected_column'].unique().tolist() + \
+            info_exchange_zscore_chat_df['expected_column'].unique().tolist() + time_diff_chat_df['expected_column'].unique().tolist() + \
+            ["named_entities"] + ["num_named_entity"] # These 2 are tested in test_named_entity_recognition()
+num_tested_chat = len(set(test_chat))
+test_conv = test_conv_df['expected_column'].unique().tolist() + test_conv_complex_df['feature'].unique().tolist()
+num_tested_conv = len(set(test_conv))
 tested_features = {}
 
 with open('test.log', 'w') as f:
-    f.write(f'Tested {tested_chat} features out of {num_features_chat} chat level features: {tested_chat/num_features_chat * 100:.2f}% Coverage!\n')
+    f.write(f'Tested {num_tested_chat} features out of {num_features_chat} chat level features: {num_tested_chat/num_features_chat * 100:.2f}% Coverage!\n')
+
+    if(num_tested_chat < num_features_chat):
+        untested_chat_features = set(chat_features).difference(set(test_chat))
+        f.write(f"Currently untested chat features: {untested_chat_features}\n")
+
     f.write(f'Tested {num_tested_conv} features out of {num_features_conv} conv level features: {num_tested_conv/num_features_conv * 100:.2f}% Coverage!\n')
-    pass
+    
+    if(num_tested_conv < num_features_conv):
+        untested_conv_features = set(conversation_features).difference(set(test_chat))
+        f.write(f"Currently untested chat features: {untested_conv_features}\n")
 
 # ---- MAIN TESTS ------
 
@@ -110,6 +126,7 @@ def test_named_entity_recognition(row):
                 file.write(f"Actual value: {actual}\n")
     else:
         expected = row[1]['expected_value'].split(',')
+        num_named_entity = row[1]['num_named_entity']
         parsed_actual = row[1]['named_entities'].replace(
             " ", "").replace("(", "").replace(")", "").split(',')
         actual = parsed_actual[0::2]
@@ -119,7 +136,7 @@ def test_named_entity_recognition(row):
             actual.pop()
 
         try:
-            assert len(expected) == len(actual)
+            assert len(expected) == len(actual) == num_named_entity
             for named_entity in expected:
                 assert named_entity.lower().strip() in actual
             tested_features["Named Entity Recognition"]['passed'] += 1
@@ -130,8 +147,8 @@ def test_named_entity_recognition(row):
                 file.write("------TEST FAILED------\n")
                 file.write(
                     f"Testing NER for message: {row[1]['message_original']}\n")
-                file.write(f"Expected value: {expected}\n")
-                file.write(f"Actual value: {actual}\n")
+                file.write(f"Expected value: {expected}; Expected num: {len(expected)}\n")
+                file.write(f"Actual value: {actual}; Actual num: {len(actual)}; Num counted: {num_named_entity}\n")
 
             # we don't raise an AssertionError here because NER isn't a perfect feature