Skip to content

Commit

Permalink
Melhoria no Uso de Threads e suporte a Lista de Professores em CSV (#6)
Browse files Browse the repository at this point in the history
* [x] Melhorado uso de threads usando `ThreadPool`
* [x] Opção de utilizar CSV como lista de professores para download/processamento
  • Loading branch information
lucassabreu authored Aug 25, 2017
1 parent 8a8dea4 commit 7035601
Show file tree
Hide file tree
Showing 37 changed files with 3,031 additions and 852 deletions.
1 change: 1 addition & 0 deletions LattesExtractor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.data
35 changes: 21 additions & 14 deletions LattesExtractor/App.config
Original file line number Diff line number Diff line change
@@ -1,44 +1,51 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<configSections>
<section name="entityFramework" type="System.Data.Entity.Internal.ConfigFile.EntityFrameworkSection, EntityFramework, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" requirePermission="false" />
<section name="entityFramework" type="System.Data.Entity.Internal.ConfigFile.EntityFrameworkSection, EntityFramework, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" requirePermission="false"/>
<!-- For more information on Entity Framework configuration, visit http://go.microsoft.com/fwlink/?LinkID=237468 -->
</configSections>
<appSettings>
<!-- Descomentar a linha abaixo para utilizar o REST service https://buscacv.cnpq.br -->
<!-- <add key="UseNewCNPqRestService" value="true" /> -->

<!-- Descomentar a linha abaixo para informar um CSV como lista de curriculos para importação -->
<!-- <add key="CSVCurriculumValueNumberList" value="./resources/csv_list_sample.csv" /> -->

<!-- Descomentar a linha abaixo caso deseje fazer a importação de uma pasta no lugar de baixar do Webservice do CNPq -->
<!-- <add key="ImportFolder" value="resources/sample" /> -->
<add key="TempDir" value="./.temp/cnpq" />
<add key="IgnorePedingLastExecution" value="false" />
<add key="LattesCurriculumVitaeODBCConnection" value="Provider=Microsoft.Jet.OLEDB.4.0; Data Source=.\resources\IDs_Curriculos.local.xls; Extended Properties='Excel 8.0;HDR=YES;'" />

<add key="TempDir" value="./.temp/cnpq"/>
<add key="IgnorePedingLastExecution" value="false"/>
<add key="LattesCurriculumVitaeODBCConnection" value="Provider=Microsoft.Jet.OLEDB.4.0; Data Source=.\resources\IDs_Curriculos.local.xls; Extended Properties='Excel 8.0;HDR=YES;'"/>
<!-- Numero do Curriculo, Nome do Professor, Data Nascimento e CPF -->
<add key="LattesCurriculumVitaeQuery" value="SELECT [numerocurriculo], [nomecompleto] FROM [ids_lattes$]" />
<add key="ClientSettingsProvider.ServiceUri" value="" />
<add key="LattesCurriculumVitaeQuery" value="SELECT [numerocurriculo], [nomecompleto] FROM [ids_lattes$]"/>
<add key="ClientSettingsProvider.ServiceUri" value=""/>
</appSettings>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1"/>
</startup>
<system.serviceModel>
<bindings>
<basicHttpBinding>
<binding name="WSCurriculoBinding" closeTimeout="00:03:00" openTimeout="00:03:00" receiveTimeout="00:15:00" sendTimeout="00:03:00" allowCookies="false" bypassProxyOnLocal="false" hostNameComparisonMode="StrongWildcard" maxBufferSize="50000000" maxBufferPoolSize="50000000" maxReceivedMessageSize="50000000" messageEncoding="Text" textEncoding="utf-8" transferMode="Buffered" useDefaultWebProxy="true" />
<binding name="WSCurriculoBinding" closeTimeout="00:03:00" openTimeout="00:03:00" receiveTimeout="00:15:00" sendTimeout="00:03:00" allowCookies="false" bypassProxyOnLocal="false" hostNameComparisonMode="StrongWildcard" maxBufferSize="50000000" maxBufferPoolSize="50000000" maxReceivedMessageSize="50000000" messageEncoding="Text" textEncoding="utf-8" transferMode="Buffered" useDefaultWebProxy="true"/>
</basicHttpBinding>
</bindings>
<client>
<endpoint address="http://servicosweb.cnpq.br/srvcurriculo/WSCurriculo?wsdl" binding="basicHttpBinding" bindingConfiguration="WSCurriculoBinding" contract="CurriculoLattesWebService.WSCurriculo" name="WSCurriculoPort" />
<endpoint address="http://servicosweb.cnpq.br/srvcurriculo/WSCurriculo?wsdl" binding="basicHttpBinding" bindingConfiguration="WSCurriculoBinding" contract="CurriculoLattesWebService.WSCurriculo" name="WSCurriculoPort"/>
</client>
</system.serviceModel>
<entityFramework>
<defaultConnectionFactory type="System.Data.Entity.Infrastructure.LocalDbConnectionFactory, EntityFramework">
<parameters>
<parameter value="v12.0" />
<parameter value="v12.0"/>
</parameters>
</defaultConnectionFactory>
<providers>
<provider invariantName="System.Data.SqlClient" type="System.Data.Entity.SqlServer.SqlProviderServices, EntityFramework.SqlServer" />
<provider invariantName="System.Data.SqlClient" type="System.Data.Entity.SqlServer.SqlProviderServices, EntityFramework.SqlServer"/>
</providers>
</entityFramework>
<connectionStrings>
<!-- add name="LattesDatabase" connectionString="metadata=res://*/Entities.Database.LattesDatabase.csdl|res://*/Entities.Database.LattesDatabase.ssdl|res://*/Entities.Database.LattesDatabase.msl;provider=System.Data.SqlClient;provider connection string=&quot;data source=172.18.0.21;initial catalog=dblattes;integrated security=False;User ID=bob;Password=bob;;MultipleActiveResultSets=True;App=EntityFramework&quot;" providerName="System.Data.EntityClient" /-->
<add name="LattesDatabase" connectionString="metadata=res://*/Entities.Database.LattesDatabase.csdl|res://*/Entities.Database.LattesDatabase.ssdl|res://*/Entities.Database.LattesDatabase.msl;provider=System.Data.SqlClient;provider connection string=&quot;data source=localhost\SQLEXPRESS;initial catalog=dblattes;integrated security=True;MultipleActiveResultSets=True;App=EntityFramework&quot;" providerName="System.Data.EntityClient" />
<add name="LattesDatabase" connectionString="metadata=res://*/Entities.Database.LattesDatabase.csdl|res://*/Entities.Database.LattesDatabase.ssdl|res://*/Entities.Database.LattesDatabase.msl;provider=System.Data.SqlClient;provider connection string=&quot;data source=localhost\SQLEXPRESS;initial catalog=dblattes;integrated security=True;MultipleActiveResultSets=True;App=EntityFramework&quot;" providerName="System.Data.EntityClient"/>
</connectionStrings>
</configuration>
</configuration>
65 changes: 65 additions & 0 deletions LattesExtractor/Collections/Channel.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
using System;
using System.Collections;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LattesExtractor.Collections
{
// https://codereview.stackexchange.com/questions/32500/golang-channel-in-c#answer-32510
public class Channel<T>
{
private BlockingCollection<T> _buffer;

public Channel() : this(1) { }

public Channel(int size)
{
_buffer = new BlockingCollection<T>(new ConcurrentQueue<T>(), size);
}

public bool Send(T t)
{
try
{
_buffer.Add(t);
}
catch (InvalidOperationException)
{
// will be thrown when the collection gets closed
return false;
}
return true;
}

public bool Receive(out T val)
{
try
{
val = _buffer.Take();
}
catch (InvalidOperationException)
{
// will be thrown when the collection is empty and got closed
val = default(T);
return false;
}
return true;
}

public void Close()
{
_buffer.CompleteAdding();
}

public IEnumerable<T> Range()
{
while (Receive(out T val))
{
yield return val;
}
}
}
}
164 changes: 73 additions & 91 deletions LattesExtractor/Controller/CurriculumVitaeProcessorController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,126 +10,108 @@
using LattesExtractor.Entities.Xml;
using LattesExtractor.DAO;
using log4net;
using LattesExtractor.Collections;

namespace LattesExtractor.Controller
{
class CurriculumVitaeProcessorController
{
private LattesModule lattesModule = null;
private int _sequence = 0;

private static readonly ILog Logger = LogManager.GetLogger(typeof(CurriculumVitaeProcessorController).Name);

internal static void ProcessCurriculumVitaes(LattesModule lattesModule)
{
List<Thread> threads = new List<Thread>();
int i = 0;

CurriculumVitaeProcessorController pcvt = new CurriculumVitaeProcessorController(lattesModule, i++);
threads.Add(new Thread(new ThreadStart(pcvt.ThreadRun)));
private LattesModule _lattesModule;
private Channel<CurriculoEntry> _curriculumVitaeForProcess;
private int _workItemCount = 0;
private XmlSerializer _curriculumVitaeUnserializer = new XmlSerializer(typeof(CurriculoVitaeXml));

pcvt = new CurriculumVitaeProcessorController(lattesModule, i++);
threads.Add(new Thread(new ThreadStart(pcvt.ThreadRun)));

pcvt = new CurriculumVitaeProcessorController(lattesModule, i++);
threads.Add(new Thread(new ThreadStart(pcvt.ThreadRun)));

pcvt = new CurriculumVitaeProcessorController(lattesModule, i++);
threads.Add(new Thread(new ThreadStart(pcvt.ThreadRun)));
public CurriculumVitaeProcessorController(
LattesModule lattesModule,
Channel<CurriculoEntry> curriculumVitaeForProcess
)
{
_lattesModule = lattesModule;
_curriculumVitaeForProcess = curriculumVitaeForProcess;
}

// inicia as threads
i = 0;
foreach (Thread t in threads)
public void ProcessCurriculumVitaes(ManualResetEvent doneEvent)
{
try
{
t.Name = String.Format("Thread {0}", i);
t.Start();
var processDoneEvent = new ManualResetEvent(false);
foreach (var curriculoEntry in _curriculumVitaeForProcess.Range())
{
Interlocked.Increment(ref _workItemCount);
ThreadPool.QueueUserWorkItem(o => ProcessCurriculumVitae(curriculoEntry, processDoneEvent));
}
if (_workItemCount > 0)
{
processDoneEvent.WaitOne();
}
}

// espera os processos concluirem a execução
foreach (Thread t in threads)
finally
{
t.Join();
doneEvent.Set();
}
}

public CurriculumVitaeProcessorController(LattesModule lattesModule, int seq)
private void ProcessCurriculumVitae(CurriculoEntry curriculoEntry, ManualResetEvent doneEvent)
{
this.lattesModule = lattesModule;
this._sequence = seq;
}

public void ThreadRun()
{
XmlSerializer curriculumVitaeUnserializer = new XmlSerializer(typeof(CurriculoVitaeXml));

XmlDocument curriculumVitaeXml;
XDocument curriculumVitaeXDocument;
CurriculoVitaeXml curriculumVitae;

CurriculoEntry curriculoEntry;
string filename;

var lattesDatabase = new LattesDatabase();

ProfessorDAOService professorDAOService = new ProfessorDAOService(lattesDatabase);

while (lattesModule.HasNextCurriculumVitaeForProcess)
try
{
curriculoEntry = lattesModule.GetNextCurriculumVitaeForProcess();

// para ie caso da thread não conseguir pegar ie ultimo arquivo v tempo
if (curriculoEntry == null)
continue;
var filename = _lattesModule.GetCurriculumVitaeFileName(curriculoEntry.NumeroCurriculo);

filename = lattesModule.GetCurriculumVitaeFileName(curriculoEntry.NumeroCurriculo);
//curriculumXMLFile = new FileStream(filename, FileMode.Open);

curriculumVitaeXml = new XmlDocument();
//curriculumVitaeXml.Load(curriculumXMLFile);
var curriculumVitaeXml = new XmlDocument();
curriculumVitaeXml.Load(filename);

// nescessário para ie deserialize reconhecer ie Xml
// nescessário para o deserialize reconhecer o Xml
curriculumVitaeXml.DocumentElement.SetAttribute("xmlns", "http://tempuri.org/LMPLCurriculo");

curriculumVitaeXDocument = XDocument.Parse(curriculumVitaeXml.InnerXml);
var curriculumVitaeXDocument = XDocument.Parse(curriculumVitaeXml.InnerXml);
var curriculumVitae = _curriculumVitaeUnserializer.Deserialize(curriculumVitaeXDocument.CreateReader()) as CurriculoVitaeXml;
curriculoEntry.NomeProfessor = curriculumVitae.DADOSGERAIS.NOMECOMPLETO;

var professorDAOService = new ProfessorDAOService(new LattesDatabase());
Logger.Debug(String.Format(
"Iniciando processamento currículo {0} do Professor {1}...",
curriculoEntry.NumeroCurriculo,
curriculumVitae.DADOSGERAIS.NOMECOMPLETO
));

try
if (professorDAOService.ProcessCurriculumVitaeXML(curriculumVitae, curriculoEntry))
{
curriculumVitae = (CurriculoVitaeXml)curriculumVitaeUnserializer.Deserialize(curriculumVitaeXDocument.CreateReader());
Logger.Info($"Currículo {curriculoEntry.NumeroCurriculo} do Professor {curriculumVitae.DADOSGERAIS.NOMECOMPLETO} processado com sucesso !");
File.Delete(filename);
}
catch (Exception ex)

}
catch (Exception ex)
{
Logger.Error(String.Format(
"Erro durante a leitura do XML {0}: {1}\n{2}",
curriculoEntry.NumeroCurriculo,
ex.Message,
ex.StackTrace
));

int sequencia = 1;
while (ex.InnerException != null)
{
Logger.Error(String.Format("Erro durante a leitura do XML:", ex.Message));
Logger.Error(ex.StackTrace);
if (ex.InnerException != null)
{
Logger.Error("Excessão Interna:");
int sequencia = 1;
while (ex.InnerException != null)
{
ex = ex.InnerException;
Logger.Error(String.Format("Excessão Interna [{0}]: {1}", sequencia++, ex.Message));
Logger.Error(ex.StackTrace);
}
}
continue;
ex = ex.InnerException;
Logger.Error(String.Format(
"Excessão Interna [{0}]: {1}\n{2}",
sequencia++,
ex.Message,
ex.StackTrace
));
}

// limpa ponteiros
curriculumVitaeXDocument = null;
curriculumVitaeXml = null;

Logger.Info(String.Format("Processando XML {0} do Professor {1} [Thread {2}]", curriculoEntry.NumeroCurriculo, curriculumVitae.DADOSGERAIS.NOMECOMPLETO, this._sequence));

// processa curriculo
if (professorDAOService.ProcessCurriculumVitaeXML(curriculumVitae, curriculoEntry))
File.Delete(filename);
else {
lattesDatabase.Database.Connection.Close();
lattesDatabase = new LattesDatabase();
professorDAOService.LattesDatabase = lattesDatabase;
}
finally
{
if (Interlocked.Decrement(ref _workItemCount) == 0)
{
doneEvent.Set();
}
}
}
}
}
}
Loading

0 comments on commit 7035601

Please sign in to comment.